Merge "Support controlling some unigram flags in addUnigramWord."
This commit is contained in:
commit
5b0ca975c4
9 changed files with 108 additions and 53 deletions
|
@ -301,11 +301,12 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
|
||||||
int codePoints[wordLength];
|
int codePoints[wordLength];
|
||||||
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
|
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
|
||||||
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
|
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
|
||||||
int shortcutTargetCodePoints[wordLength];
|
int shortcutTargetCodePoints[shortcutLength];
|
||||||
if (shortcutTarget) {
|
if (shortcutTarget) {
|
||||||
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
|
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
|
||||||
}
|
}
|
||||||
dictionary->addUnigramWord(codePoints, wordLength, probability, timestamp);
|
dictionary->addUnigramWord(codePoints, wordLength, probability, shortcutTargetCodePoints,
|
||||||
|
shortcutLength, shortuctProbability, isNotAWord, isBlacklisted, timestamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
|
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
|
||||||
|
@ -356,7 +357,6 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
||||||
jclass languageModelParamClass = env->GetObjectClass(languageModelParam);
|
jclass languageModelParamClass = env->GetObjectClass(languageModelParam);
|
||||||
env->DeleteLocalRef(languageModelParam);
|
env->DeleteLocalRef(languageModelParam);
|
||||||
|
|
||||||
// TODO: Support shortcut and flags.
|
|
||||||
jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I");
|
jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I");
|
||||||
jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I");
|
jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I");
|
||||||
jfieldID unigramProbabilityFieldId =
|
jfieldID unigramProbabilityFieldId =
|
||||||
|
@ -365,6 +365,14 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
||||||
env->GetFieldID(languageModelParamClass, "mBigramProbability", "I");
|
env->GetFieldID(languageModelParamClass, "mBigramProbability", "I");
|
||||||
jfieldID timestampFieldId =
|
jfieldID timestampFieldId =
|
||||||
env->GetFieldID(languageModelParamClass, "mTimestamp", "I");
|
env->GetFieldID(languageModelParamClass, "mTimestamp", "I");
|
||||||
|
jfieldID shortcutTargetFieldId =
|
||||||
|
env->GetFieldID(languageModelParamClass, "mShortcutTarget", "[I");
|
||||||
|
jfieldID shortcutProbabilityFieldId =
|
||||||
|
env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I");
|
||||||
|
jfieldID isNotAWordFieldId =
|
||||||
|
env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z");
|
||||||
|
jfieldID isBlacklistedFieldId =
|
||||||
|
env->GetFieldID(languageModelParamClass, "mIsBlacklisted", "Z");
|
||||||
env->DeleteLocalRef(languageModelParamClass);
|
env->DeleteLocalRef(languageModelParamClass);
|
||||||
|
|
||||||
for (int i = startIndex; i < languageModelParamCount; ++i) {
|
for (int i = startIndex; i < languageModelParamCount; ++i) {
|
||||||
|
@ -386,7 +394,19 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
||||||
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
|
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
|
||||||
jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
|
jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
|
||||||
jint timestamp = env->GetIntField(languageModelParam, timestampFieldId);
|
jint timestamp = env->GetIntField(languageModelParam, timestampFieldId);
|
||||||
dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability, timestamp);
|
jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId);
|
||||||
|
jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId);
|
||||||
|
jintArray shortcutTarget = static_cast<jintArray>(
|
||||||
|
env->GetObjectField(languageModelParam, shortcutTargetFieldId));
|
||||||
|
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
|
||||||
|
int shortcutTargetCodePoints[shortcutLength];
|
||||||
|
if (shortcutTarget) {
|
||||||
|
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
|
||||||
|
}
|
||||||
|
jint shortcutProbability = env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
|
||||||
|
dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability,
|
||||||
|
shortcutTargetCodePoints, shortcutLength, shortcutProbability,
|
||||||
|
isNotAWord, isBlacklisted, timestamp);
|
||||||
if (word0) {
|
if (word0) {
|
||||||
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
|
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
|
||||||
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,
|
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,
|
||||||
|
@ -397,6 +417,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
||||||
}
|
}
|
||||||
env->DeleteLocalRef(word0);
|
env->DeleteLocalRef(word0);
|
||||||
env->DeleteLocalRef(word1);
|
env->DeleteLocalRef(word1);
|
||||||
|
env->DeleteLocalRef(shortcutTarget);
|
||||||
env->DeleteLocalRef(languageModelParam);
|
env->DeleteLocalRef(languageModelParam);
|
||||||
}
|
}
|
||||||
return languageModelParamCount;
|
return languageModelParamCount;
|
||||||
|
|
|
@ -99,10 +99,13 @@ int Dictionary::getBigramProbability(const int *word0, int length0, const int *w
|
||||||
}
|
}
|
||||||
|
|
||||||
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability,
|
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability,
|
||||||
|
const int *const shortcutTargetCodePoints, const int shortcutLength,
|
||||||
|
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
||||||
const int timestamp) {
|
const int timestamp) {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
mDictionaryStructureWithBufferPolicy.get()->addUnigramWord(word, length, probability,
|
mDictionaryStructureWithBufferPolicy.get()->addUnigramWord(word, length, probability,
|
||||||
timestamp);
|
shortcutTargetCodePoints, shortcutLength, shortcutProbability, isNotAWord,
|
||||||
|
isBlacklisted, timestamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
|
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||||
|
|
|
@ -73,6 +73,8 @@ class Dictionary {
|
||||||
int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
|
int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
|
||||||
|
|
||||||
void addUnigramWord(const int *const word, const int length, const int probability,
|
void addUnigramWord(const int *const word, const int length, const int probability,
|
||||||
|
const int *const shortcutTargetCodePoints, const int shortcutLength,
|
||||||
|
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
||||||
const int timestamp);
|
const int timestamp);
|
||||||
|
|
||||||
void addBigramWords(const int *const word0, const int length0, const int *const word1,
|
void addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||||
|
|
|
@ -67,7 +67,9 @@ class DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
// Returns whether the update was success or not.
|
// Returns whether the update was success or not.
|
||||||
virtual bool addUnigramWord(const int *const word, const int length,
|
virtual bool addUnigramWord(const int *const word, const int length,
|
||||||
const int probability, const int timestamp) = 0;
|
const int probability, const int *const shortcutTargetCodePoints,
|
||||||
|
const int shortcutLength, const int shortcutProbability, const bool isNotAWord,
|
||||||
|
const bool isBlacklisted,const int timestamp) = 0;
|
||||||
|
|
||||||
// Returns whether the update was success or not.
|
// Returns whether the update was success or not.
|
||||||
virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||||
|
|
|
@ -77,6 +77,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool addUnigramWord(const int *const word, const int length, const int probability,
|
bool addUnigramWord(const int *const word, const int length, const int probability,
|
||||||
|
const int *const shortcutTargetCodePoints, const int shortcutLength,
|
||||||
|
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
||||||
const int timestamp) {
|
const int timestamp) {
|
||||||
// This method should not be called for non-updatable dictionary.
|
// This method should not be called for non-updatable dictionary.
|
||||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||||
|
|
|
@ -30,7 +30,8 @@ const int DynamicPatriciaTrieUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
|
||||||
bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
|
bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
|
||||||
DynamicPatriciaTrieReadingHelper *const readingHelper,
|
DynamicPatriciaTrieReadingHelper *const readingHelper,
|
||||||
const int *const wordCodePoints, const int codePointCount, const int probability,
|
const int *const wordCodePoints, const int codePointCount, const int probability,
|
||||||
const int timestamp, bool *const outAddedNewUnigram) {
|
const bool isNotAWord, const bool isBlacklisted, const int timestamp,
|
||||||
|
bool *const outAddedNewUnigram) {
|
||||||
int parentPos = NOT_A_DICT_POS;
|
int parentPos = NOT_A_DICT_POS;
|
||||||
while (!readingHelper->isEnd()) {
|
while (!readingHelper->isEnd()) {
|
||||||
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
|
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
|
||||||
|
@ -52,18 +53,20 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
|
||||||
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
|
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
|
||||||
wordCodePoints[matchedCodePointCount + j])) {
|
wordCodePoints[matchedCodePointCount + j])) {
|
||||||
*outAddedNewUnigram = true;
|
*outAddedNewUnigram = true;
|
||||||
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, probability, timestamp,
|
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, isNotAWord, isBlacklisted,
|
||||||
wordCodePoints + matchedCodePointCount,
|
probability, timestamp, wordCodePoints + matchedCodePointCount,
|
||||||
codePointCount - matchedCodePointCount);
|
codePointCount - matchedCodePointCount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// All characters are matched.
|
// All characters are matched.
|
||||||
if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
|
if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
|
||||||
return setPtNodeProbability(&ptNodeParams, probability, timestamp, outAddedNewUnigram);
|
return setPtNodeProbability(&ptNodeParams, isNotAWord, isBlacklisted, probability,
|
||||||
|
timestamp, outAddedNewUnigram);
|
||||||
}
|
}
|
||||||
if (!ptNodeParams.hasChildren()) {
|
if (!ptNodeParams.hasChildren()) {
|
||||||
*outAddedNewUnigram = true;
|
*outAddedNewUnigram = true;
|
||||||
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, probability, timestamp,
|
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
|
||||||
|
isNotAWord, isBlacklisted, probability, timestamp,
|
||||||
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
|
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
|
||||||
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
|
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
|
||||||
}
|
}
|
||||||
|
@ -79,8 +82,8 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
|
||||||
*outAddedNewUnigram = true;
|
*outAddedNewUnigram = true;
|
||||||
return createAndInsertNodeIntoPtNodeArray(parentPos,
|
return createAndInsertNodeIntoPtNodeArray(parentPos,
|
||||||
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
|
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
|
||||||
codePointCount - readingHelper->getPrevTotalCodePointCount(), probability,
|
codePointCount - readingHelper->getPrevTotalCodePointCount(),
|
||||||
timestamp, &pos);
|
isNotAWord, isBlacklisted, probability, timestamp, &pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
bool DynamicPatriciaTrieUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
||||||
|
@ -103,19 +106,21 @@ bool DynamicPatriciaTrieUpdatingHelper::removeBigramWords(const int word0Pos, co
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
|
bool DynamicPatriciaTrieUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
|
||||||
const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
|
const int *const nodeCodePoints, const int nodeCodePointCount,
|
||||||
const int timestamp, int *const forwardLinkFieldPos) {
|
const bool isNotAWord, const bool isBlacklisted, const int probability,
|
||||||
|
const int timestamp, int *const forwardLinkFieldPos) {
|
||||||
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
||||||
newPtNodeArrayPos, forwardLinkFieldPos)) {
|
newPtNodeArrayPos, forwardLinkFieldPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
|
return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
|
||||||
probability, timestamp);
|
isNotAWord, isBlacklisted, probability, timestamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
|
bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
|
||||||
const PtNodeParams *const originalPtNodeParams, const int probability, const int timestamp,
|
const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
|
||||||
|
const bool isBlacklisted, const int probability, const int timestamp,
|
||||||
bool *const outAddedNewUnigram) {
|
bool *const outAddedNewUnigram) {
|
||||||
if (originalPtNodeParams->isTerminal()) {
|
if (originalPtNodeParams->isTerminal()) {
|
||||||
// Overwrites the probability.
|
// Overwrites the probability.
|
||||||
|
@ -127,9 +132,9 @@ bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
|
||||||
const int movedPos = mBuffer->getTailPosition();
|
const int movedPos = mBuffer->getTailPosition();
|
||||||
int writingPos = movedPos;
|
int writingPos = movedPos;
|
||||||
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
|
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
|
||||||
true /* isTerminal */, originalPtNodeParams->getParentPos(),
|
isNotAWord, isBlacklisted, true /* isTerminal */,
|
||||||
originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(),
|
originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(),
|
||||||
probability));
|
originalPtNodeParams->getCodePoints(), probability));
|
||||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
||||||
timestamp, &writingPos)) {
|
timestamp, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -142,25 +147,28 @@ bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
|
bool DynamicPatriciaTrieUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
|
||||||
const PtNodeParams *const parentPtNodeParams, const int probability, const int timestamp,
|
const PtNodeParams *const parentPtNodeParams, const bool isNotAWord,
|
||||||
|
const bool isBlacklisted, const int probability, const int timestamp,
|
||||||
const int *const codePoints, const int codePointCount) {
|
const int *const codePoints, const int codePointCount) {
|
||||||
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
||||||
if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
|
if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
|
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
|
||||||
codePointCount, probability, timestamp);
|
codePointCount, isNotAWord, isBlacklisted, probability, timestamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
|
bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
|
||||||
const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
|
const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
|
||||||
const int probability, const int timestamp) {
|
const bool isNotAWord, const bool isBlacklisted, const int probability,
|
||||||
|
const int timestamp) {
|
||||||
int writingPos = mBuffer->getTailPosition();
|
int writingPos = mBuffer->getTailPosition();
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
|
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
|
||||||
1 /* arraySize */, &writingPos)) {
|
1 /* arraySize */, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */,
|
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||||
|
isNotAWord, isBlacklisted, true /* isTerminal */,
|
||||||
parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
|
parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
|
||||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp,
|
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp,
|
||||||
&writingPos)) {
|
&writingPos)) {
|
||||||
|
@ -176,8 +184,8 @@ bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
|
||||||
// Returns whether the dictionary updating was succeeded or not.
|
// Returns whether the dictionary updating was succeeded or not.
|
||||||
bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
||||||
const int probabilityOfNewPtNode, const int timestamp, const int *const newNodeCodePoints,
|
const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
|
||||||
const int newNodeCodePointCount) {
|
const int timestamp, const int *const newNodeCodePoints, const int newNodeCodePointCount) {
|
||||||
// When addsExtraChild is true, split the reallocating PtNode and add new child.
|
// When addsExtraChild is true, split the reallocating PtNode and add new child.
|
||||||
// Reallocating PtNode: abcde, newNode: abcxy.
|
// Reallocating PtNode: abcde, newNode: abcxy.
|
||||||
// abc (1st, not terminal) __ de (2nd)
|
// abc (1st, not terminal) __ de (2nd)
|
||||||
|
@ -191,14 +199,16 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
// Write the 1st part of the reallocating node. The children position will be updated later
|
// Write the 1st part of the reallocating node. The children position will be updated later
|
||||||
// with actual children position.
|
// with actual children position.
|
||||||
if (addsExtraChild) {
|
if (addsExtraChild) {
|
||||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(false /* isTerminal */,
|
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||||
|
false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */,
|
||||||
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
|
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
|
||||||
reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY));
|
reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY));
|
||||||
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */,
|
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||||
|
isNotAWord, isBlacklisted, true /* isTerminal */,
|
||||||
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
|
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
|
||||||
reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode));
|
reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode));
|
||||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
||||||
|
@ -216,6 +226,7 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
// Write the 2nd part of the reallocating node.
|
// Write the 2nd part of the reallocating node.
|
||||||
const int secondPartOfReallocatedPtNodePos = writingPos;
|
const int secondPartOfReallocatedPtNodePos = writingPos;
|
||||||
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
|
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
|
||||||
|
reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(),
|
||||||
reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
|
reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
|
||||||
reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
|
reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
|
||||||
reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
|
reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
|
||||||
|
@ -224,7 +235,8 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (addsExtraChild) {
|
if (addsExtraChild) {
|
||||||
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(true /* isTerminal */,
|
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
|
||||||
|
isNotAWord, isBlacklisted, true /* isTerminal */,
|
||||||
firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
|
firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
|
||||||
newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
|
newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
|
||||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
|
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
|
||||||
|
@ -249,23 +261,25 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
}
|
}
|
||||||
|
|
||||||
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams(
|
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams(
|
||||||
const PtNodeParams *const originalPtNodeParams, const bool isTerminal, const int parentPos,
|
const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
|
||||||
|
const bool isBlacklisted, const bool isTerminal, const int parentPos,
|
||||||
const int codePointCount, const int *const codePoints, const int probability) const {
|
const int codePointCount, const int *const codePoints, const int probability) const {
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
||||||
originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(), isTerminal,
|
isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(),
|
||||||
originalPtNodeParams->hasShortcutTargets(), originalPtNodeParams->hasBigrams(),
|
originalPtNodeParams->hasBigrams(), codePointCount > 1 /* hasMultipleChars */,
|
||||||
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
|
CHILDREN_POSITION_FIELD_SIZE);
|
||||||
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
|
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
|
||||||
probability);
|
probability);
|
||||||
}
|
}
|
||||||
|
|
||||||
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getPtNodeParamsForNewPtNode(
|
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getPtNodeParamsForNewPtNode(
|
||||||
const bool isTerminal, const int parentPos, const int codePointCount,
|
const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
|
||||||
const int *const codePoints, const int probability) const {
|
const int parentPos, const int codePointCount, const int *const codePoints,
|
||||||
|
const int probability) const {
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
||||||
false /* isBlacklisted */, false /* isNotAWord */, isTerminal,
|
isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
|
||||||
false /* hasShortcutTargets */, false /* hasBigrams */,
|
false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
|
||||||
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
|
CHILDREN_POSITION_FIELD_SIZE);
|
||||||
return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);
|
return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,8 @@ class DynamicPatriciaTrieUpdatingHelper {
|
||||||
// Add a word to the dictionary. If the word already exists, update the probability.
|
// Add a word to the dictionary. If the word already exists, update the probability.
|
||||||
bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
|
bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
|
||||||
const int *const wordCodePoints, const int codePointCount, const int probability,
|
const int *const wordCodePoints, const int codePointCount, const int probability,
|
||||||
const int timestamp, bool *const outAddedNewUnigram);
|
const bool isNotAWord, const bool isBlacklisted, const int timestamp,
|
||||||
|
bool *const outAddedNewUnigram);
|
||||||
|
|
||||||
// Add a bigram relation from word0Pos to word1Pos.
|
// Add a bigram relation from word0Pos to word1Pos.
|
||||||
bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
|
bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
|
||||||
|
@ -61,29 +62,34 @@ class DynamicPatriciaTrieUpdatingHelper {
|
||||||
PtNodeWriter *const mPtNodeWriter;
|
PtNodeWriter *const mPtNodeWriter;
|
||||||
|
|
||||||
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
||||||
const int nodeCodePointCount, const int probability, const int timestamp,
|
const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
|
||||||
int *const forwardLinkFieldPos);
|
const int probability, const int timestamp, int *const forwardLinkFieldPos);
|
||||||
|
|
||||||
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const int probability,
|
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
|
||||||
const int timestamp, bool *const outAddedNewUnigram);
|
const bool isBlacklisted, const int probability, const int timestamp,
|
||||||
|
bool *const outAddedNewUnigram);
|
||||||
|
|
||||||
bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
|
bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
|
||||||
const int probability, const int timestamp, const int *const codePoints,
|
const bool isNotAWord, const bool isBlacklisted, const int probability,
|
||||||
const int codePointCount);
|
const int timestamp, const int *const codePoints, const int codePointCount);
|
||||||
|
|
||||||
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
|
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
|
||||||
const int nodeCodePointCount, const int probability, const int timestamp);
|
const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
|
||||||
|
const int probability, const int timestamp);
|
||||||
|
|
||||||
bool reallocatePtNodeAndAddNewPtNodes(
|
bool reallocatePtNodeAndAddNewPtNodes(
|
||||||
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
||||||
const int probabilityOfNewPtNode, const int timestamp,
|
const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
|
||||||
const int *const newNodeCodePoints, const int newNodeCodePointCount);
|
const int timestamp, const int *const newNodeCodePoints,
|
||||||
|
const int newNodeCodePointCount);
|
||||||
|
|
||||||
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
|
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
|
||||||
const bool isTerminal, const int parentPos, const int codePointCount,
|
const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
|
||||||
|
const int parentPos, const int codePointCount,
|
||||||
const int *const codePoints, const int probability) const;
|
const int *const codePoints, const int probability) const;
|
||||||
|
|
||||||
const PtNodeParams getPtNodeParamsForNewPtNode(const bool isTerminal, const int parentPos,
|
const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted,
|
||||||
|
const bool isTerminal, const int parentPos,
|
||||||
const int codePointCount, const int *const codePoints, const int probability) const;
|
const int codePointCount, const int *const codePoints, const int probability) const;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -137,7 +137,9 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
||||||
const int probability, const int timestamp) {
|
const int probability, const int *const shortcutTargetCodePoints, const int shortcutLength,
|
||||||
|
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
||||||
|
const int timestamp) {
|
||||||
if (!mBuffers.get()->isUpdatable()) {
|
if (!mBuffers.get()->isUpdatable()) {
|
||||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
|
@ -150,8 +152,9 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
|
DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
bool addedNewUnigram = false;
|
bool addedNewUnigram = false;
|
||||||
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, timestamp,
|
// TODO: Add shortcut.
|
||||||
&addedNewUnigram)) {
|
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, isNotAWord,
|
||||||
|
isBlacklisted, timestamp, &addedNewUnigram)) {
|
||||||
if (addedNewUnigram) {
|
if (addedNewUnigram) {
|
||||||
mUnigramCount++;
|
mUnigramCount++;
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,6 +88,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool addUnigramWord(const int *const word, const int length, const int probability,
|
bool addUnigramWord(const int *const word, const int length, const int probability,
|
||||||
|
const int *const shortcutTargetCodePoints, const int shortcutLength,
|
||||||
|
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
||||||
const int timestamp);
|
const int timestamp);
|
||||||
|
|
||||||
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||||
|
|
Loading…
Reference in a new issue