am 5b0ca975
: Merge "Support controlling some unigram flags in addUnigramWord."
* commit '5b0ca975c44204a3a2ef2c2e95feef8c48313f88': Support controlling some unigram flags in addUnigramWord.
This commit is contained in:
commit
1afc3a2a6e
9 changed files with 108 additions and 53 deletions
|
@ -301,11 +301,12 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
|
|||
int codePoints[wordLength];
|
||||
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
|
||||
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
|
||||
int shortcutTargetCodePoints[wordLength];
|
||||
int shortcutTargetCodePoints[shortcutLength];
|
||||
if (shortcutTarget) {
|
||||
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
|
||||
}
|
||||
dictionary->addUnigramWord(codePoints, wordLength, probability, timestamp);
|
||||
dictionary->addUnigramWord(codePoints, wordLength, probability, shortcutTargetCodePoints,
|
||||
shortcutLength, shortuctProbability, isNotAWord, isBlacklisted, timestamp);
|
||||
}
|
||||
|
||||
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
|
||||
|
@ -356,7 +357,6 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
|||
jclass languageModelParamClass = env->GetObjectClass(languageModelParam);
|
||||
env->DeleteLocalRef(languageModelParam);
|
||||
|
||||
// TODO: Support shortcut and flags.
|
||||
jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I");
|
||||
jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I");
|
||||
jfieldID unigramProbabilityFieldId =
|
||||
|
@ -365,6 +365,14 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
|||
env->GetFieldID(languageModelParamClass, "mBigramProbability", "I");
|
||||
jfieldID timestampFieldId =
|
||||
env->GetFieldID(languageModelParamClass, "mTimestamp", "I");
|
||||
jfieldID shortcutTargetFieldId =
|
||||
env->GetFieldID(languageModelParamClass, "mShortcutTarget", "[I");
|
||||
jfieldID shortcutProbabilityFieldId =
|
||||
env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I");
|
||||
jfieldID isNotAWordFieldId =
|
||||
env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z");
|
||||
jfieldID isBlacklistedFieldId =
|
||||
env->GetFieldID(languageModelParamClass, "mIsBlacklisted", "Z");
|
||||
env->DeleteLocalRef(languageModelParamClass);
|
||||
|
||||
for (int i = startIndex; i < languageModelParamCount; ++i) {
|
||||
|
@ -386,7 +394,19 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
|||
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
|
||||
jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
|
||||
jint timestamp = env->GetIntField(languageModelParam, timestampFieldId);
|
||||
dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability, timestamp);
|
||||
jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId);
|
||||
jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId);
|
||||
jintArray shortcutTarget = static_cast<jintArray>(
|
||||
env->GetObjectField(languageModelParam, shortcutTargetFieldId));
|
||||
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
|
||||
int shortcutTargetCodePoints[shortcutLength];
|
||||
if (shortcutTarget) {
|
||||
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
|
||||
}
|
||||
jint shortcutProbability = env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
|
||||
dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability,
|
||||
shortcutTargetCodePoints, shortcutLength, shortcutProbability,
|
||||
isNotAWord, isBlacklisted, timestamp);
|
||||
if (word0) {
|
||||
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
|
||||
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,
|
||||
|
@ -397,6 +417,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
|||
}
|
||||
env->DeleteLocalRef(word0);
|
||||
env->DeleteLocalRef(word1);
|
||||
env->DeleteLocalRef(shortcutTarget);
|
||||
env->DeleteLocalRef(languageModelParam);
|
||||
}
|
||||
return languageModelParamCount;
|
||||
|
|
|
@ -99,10 +99,13 @@ int Dictionary::getBigramProbability(const int *word0, int length0, const int *w
|
|||
}
|
||||
|
||||
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability,
|
||||
const int *const shortcutTargetCodePoints, const int shortcutLength,
|
||||
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
||||
const int timestamp) {
|
||||
TimeKeeper::setCurrentTime();
|
||||
mDictionaryStructureWithBufferPolicy.get()->addUnigramWord(word, length, probability,
|
||||
timestamp);
|
||||
shortcutTargetCodePoints, shortcutLength, shortcutProbability, isNotAWord,
|
||||
isBlacklisted, timestamp);
|
||||
}
|
||||
|
||||
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||
|
|
|
@ -73,6 +73,8 @@ class Dictionary {
|
|||
int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
|
||||
|
||||
void addUnigramWord(const int *const word, const int length, const int probability,
|
||||
const int *const shortcutTargetCodePoints, const int shortcutLength,
|
||||
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
||||
const int timestamp);
|
||||
|
||||
void addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||
|
|
|
@ -67,7 +67,9 @@ class DictionaryStructureWithBufferPolicy {
|
|||
|
||||
// Returns whether the update was success or not.
|
||||
virtual bool addUnigramWord(const int *const word, const int length,
|
||||
const int probability, const int timestamp) = 0;
|
||||
const int probability, const int *const shortcutTargetCodePoints,
|
||||
const int shortcutLength, const int shortcutProbability, const bool isNotAWord,
|
||||
const bool isBlacklisted,const int timestamp) = 0;
|
||||
|
||||
// Returns whether the update was success or not.
|
||||
virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||
|
|
|
@ -77,6 +77,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
}
|
||||
|
||||
bool addUnigramWord(const int *const word, const int length, const int probability,
|
||||
const int *const shortcutTargetCodePoints, const int shortcutLength,
|
||||
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
||||
const int timestamp) {
|
||||
// This method should not be called for non-updatable dictionary.
|
||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||
|
|
|
@ -30,7 +30,8 @@ const int DynamicPatriciaTrieUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
|
|||
bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
|
||||
DynamicPatriciaTrieReadingHelper *const readingHelper,
|
||||
const int *const wordCodePoints, const int codePointCount, const int probability,
|
||||
const int timestamp, bool *const outAddedNewUnigram) {
|
||||
const bool isNotAWord, const bool isBlacklisted, const int timestamp,
|
||||
bool *const outAddedNewUnigram) {
|
||||
int parentPos = NOT_A_DICT_POS;
|
||||
while (!readingHelper->isEnd()) {
|
||||
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
|
||||
|
@ -52,18 +53,20 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
|
|||
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
|
||||
wordCodePoints[matchedCodePointCount + j])) {
|
||||
*outAddedNewUnigram = true;
|
||||
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, probability, timestamp,
|
||||
wordCodePoints + matchedCodePointCount,
|
||||
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, isNotAWord, isBlacklisted,
|
||||
probability, timestamp, wordCodePoints + matchedCodePointCount,
|
||||
codePointCount - matchedCodePointCount);
|
||||
}
|
||||
}
|
||||
// All characters are matched.
|
||||
if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
|
||||
return setPtNodeProbability(&ptNodeParams, probability, timestamp, outAddedNewUnigram);
|
||||
return setPtNodeProbability(&ptNodeParams, isNotAWord, isBlacklisted, probability,
|
||||
timestamp, outAddedNewUnigram);
|
||||
}
|
||||
if (!ptNodeParams.hasChildren()) {
|
||||
*outAddedNewUnigram = true;
|
||||
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, probability, timestamp,
|
||||
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
|
||||
isNotAWord, isBlacklisted, probability, timestamp,
|
||||
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
|
||||
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
|
||||
}
|
||||
|
@ -79,8 +82,8 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
|
|||
*outAddedNewUnigram = true;
|
||||
return createAndInsertNodeIntoPtNodeArray(parentPos,
|
||||
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
|
||||
codePointCount - readingHelper->getPrevTotalCodePointCount(), probability,
|
||||
timestamp, &pos);
|
||||
codePointCount - readingHelper->getPrevTotalCodePointCount(),
|
||||
isNotAWord, isBlacklisted, probability, timestamp, &pos);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
||||
|
@ -103,19 +106,21 @@ bool DynamicPatriciaTrieUpdatingHelper::removeBigramWords(const int word0Pos, co
|
|||
}
|
||||
|
||||
bool DynamicPatriciaTrieUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
|
||||
const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
|
||||
const int timestamp, int *const forwardLinkFieldPos) {
|
||||
const int *const nodeCodePoints, const int nodeCodePointCount,
|
||||
const bool isNotAWord, const bool isBlacklisted, const int probability,
|
||||
const int timestamp, int *const forwardLinkFieldPos) {
|
||||
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
||||
newPtNodeArrayPos, forwardLinkFieldPos)) {
|
||||
return false;
|
||||
}
|
||||
return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
|
||||
probability, timestamp);
|
||||
isNotAWord, isBlacklisted, probability, timestamp);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
|
||||
const PtNodeParams *const originalPtNodeParams, const int probability, const int timestamp,
|
||||
const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
|
||||
const bool isBlacklisted, const int probability, const int timestamp,
|
||||
bool *const outAddedNewUnigram) {
|
||||
if (originalPtNodeParams->isTerminal()) {
|
||||
// Overwrites the probability.
|
||||
|
@ -127,9 +132,9 @@ bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
|
|||
const int movedPos = mBuffer->getTailPosition();
|
||||
int writingPos = movedPos;
|
||||
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
|
||||
true /* isTerminal */, originalPtNodeParams->getParentPos(),
|
||||
originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(),
|
||||
probability));
|
||||
isNotAWord, isBlacklisted, true /* isTerminal */,
|
||||
originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(),
|
||||
originalPtNodeParams->getCodePoints(), probability));
|
||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
||||
timestamp, &writingPos)) {
|
||||
return false;
|
||||
|
@ -142,25 +147,28 @@ bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
|
|||
}
|
||||
|
||||
bool DynamicPatriciaTrieUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
|
||||
const PtNodeParams *const parentPtNodeParams, const int probability, const int timestamp,
|
||||
const PtNodeParams *const parentPtNodeParams, const bool isNotAWord,
|
||||
const bool isBlacklisted, const int probability, const int timestamp,
|
||||
const int *const codePoints, const int codePointCount) {
|
||||
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
||||
if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
|
||||
return false;
|
||||
}
|
||||
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
|
||||
codePointCount, probability, timestamp);
|
||||
codePointCount, isNotAWord, isBlacklisted, probability, timestamp);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
|
||||
const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
|
||||
const int probability, const int timestamp) {
|
||||
const bool isNotAWord, const bool isBlacklisted, const int probability,
|
||||
const int timestamp) {
|
||||
int writingPos = mBuffer->getTailPosition();
|
||||
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
|
||||
1 /* arraySize */, &writingPos)) {
|
||||
return false;
|
||||
}
|
||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */,
|
||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||
isNotAWord, isBlacklisted, true /* isTerminal */,
|
||||
parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
|
||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp,
|
||||
&writingPos)) {
|
||||
|
@ -176,8 +184,8 @@ bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
|
|||
// Returns whether the dictionary updating was succeeded or not.
|
||||
bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
||||
const int probabilityOfNewPtNode, const int timestamp, const int *const newNodeCodePoints,
|
||||
const int newNodeCodePointCount) {
|
||||
const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
|
||||
const int timestamp, const int *const newNodeCodePoints, const int newNodeCodePointCount) {
|
||||
// When addsExtraChild is true, split the reallocating PtNode and add new child.
|
||||
// Reallocating PtNode: abcde, newNode: abcxy.
|
||||
// abc (1st, not terminal) __ de (2nd)
|
||||
|
@ -191,14 +199,16 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
|||
// Write the 1st part of the reallocating node. The children position will be updated later
|
||||
// with actual children position.
|
||||
if (addsExtraChild) {
|
||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(false /* isTerminal */,
|
||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||
false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */,
|
||||
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
|
||||
reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY));
|
||||
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */,
|
||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||
isNotAWord, isBlacklisted, true /* isTerminal */,
|
||||
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
|
||||
reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode));
|
||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
||||
|
@ -216,6 +226,7 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
|||
// Write the 2nd part of the reallocating node.
|
||||
const int secondPartOfReallocatedPtNodePos = writingPos;
|
||||
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
|
||||
reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(),
|
||||
reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
|
||||
reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
|
||||
reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
|
||||
|
@ -224,7 +235,8 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
|||
return false;
|
||||
}
|
||||
if (addsExtraChild) {
|
||||
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(true /* isTerminal */,
|
||||
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
|
||||
isNotAWord, isBlacklisted, true /* isTerminal */,
|
||||
firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
|
||||
newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
|
||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
|
||||
|
@ -249,23 +261,25 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
|||
}
|
||||
|
||||
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams(
|
||||
const PtNodeParams *const originalPtNodeParams, const bool isTerminal, const int parentPos,
|
||||
const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
|
||||
const bool isBlacklisted, const bool isTerminal, const int parentPos,
|
||||
const int codePointCount, const int *const codePoints, const int probability) const {
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
||||
originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(), isTerminal,
|
||||
originalPtNodeParams->hasShortcutTargets(), originalPtNodeParams->hasBigrams(),
|
||||
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
|
||||
isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(),
|
||||
originalPtNodeParams->hasBigrams(), codePointCount > 1 /* hasMultipleChars */,
|
||||
CHILDREN_POSITION_FIELD_SIZE);
|
||||
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
|
||||
probability);
|
||||
}
|
||||
|
||||
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getPtNodeParamsForNewPtNode(
|
||||
const bool isTerminal, const int parentPos, const int codePointCount,
|
||||
const int *const codePoints, const int probability) const {
|
||||
const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
|
||||
const int parentPos, const int codePointCount, const int *const codePoints,
|
||||
const int probability) const {
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
||||
false /* isBlacklisted */, false /* isNotAWord */, isTerminal,
|
||||
false /* hasShortcutTargets */, false /* hasBigrams */,
|
||||
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
|
||||
isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
|
||||
false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
|
||||
CHILDREN_POSITION_FIELD_SIZE);
|
||||
return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);
|
||||
}
|
||||
|
||||
|
|
|
@ -42,7 +42,8 @@ class DynamicPatriciaTrieUpdatingHelper {
|
|||
// Add a word to the dictionary. If the word already exists, update the probability.
|
||||
bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
|
||||
const int *const wordCodePoints, const int codePointCount, const int probability,
|
||||
const int timestamp, bool *const outAddedNewUnigram);
|
||||
const bool isNotAWord, const bool isBlacklisted, const int timestamp,
|
||||
bool *const outAddedNewUnigram);
|
||||
|
||||
// Add a bigram relation from word0Pos to word1Pos.
|
||||
bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
|
||||
|
@ -61,29 +62,34 @@ class DynamicPatriciaTrieUpdatingHelper {
|
|||
PtNodeWriter *const mPtNodeWriter;
|
||||
|
||||
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
||||
const int nodeCodePointCount, const int probability, const int timestamp,
|
||||
int *const forwardLinkFieldPos);
|
||||
const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
|
||||
const int probability, const int timestamp, int *const forwardLinkFieldPos);
|
||||
|
||||
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const int probability,
|
||||
const int timestamp, bool *const outAddedNewUnigram);
|
||||
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
|
||||
const bool isBlacklisted, const int probability, const int timestamp,
|
||||
bool *const outAddedNewUnigram);
|
||||
|
||||
bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
|
||||
const int probability, const int timestamp, const int *const codePoints,
|
||||
const int codePointCount);
|
||||
const bool isNotAWord, const bool isBlacklisted, const int probability,
|
||||
const int timestamp, const int *const codePoints, const int codePointCount);
|
||||
|
||||
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
|
||||
const int nodeCodePointCount, const int probability, const int timestamp);
|
||||
const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
|
||||
const int probability, const int timestamp);
|
||||
|
||||
bool reallocatePtNodeAndAddNewPtNodes(
|
||||
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
||||
const int probabilityOfNewPtNode, const int timestamp,
|
||||
const int *const newNodeCodePoints, const int newNodeCodePointCount);
|
||||
const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
|
||||
const int timestamp, const int *const newNodeCodePoints,
|
||||
const int newNodeCodePointCount);
|
||||
|
||||
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
|
||||
const bool isTerminal, const int parentPos, const int codePointCount,
|
||||
const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
|
||||
const int parentPos, const int codePointCount,
|
||||
const int *const codePoints, const int probability) const;
|
||||
|
||||
const PtNodeParams getPtNodeParamsForNewPtNode(const bool isTerminal, const int parentPos,
|
||||
const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted,
|
||||
const bool isTerminal, const int parentPos,
|
||||
const int codePointCount, const int *const codePoints, const int probability) const;
|
||||
};
|
||||
} // namespace latinime
|
||||
|
|
|
@ -137,7 +137,9 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons
|
|||
}
|
||||
|
||||
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
||||
const int probability, const int timestamp) {
|
||||
const int probability, const int *const shortcutTargetCodePoints, const int shortcutLength,
|
||||
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
||||
const int timestamp) {
|
||||
if (!mBuffers.get()->isUpdatable()) {
|
||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
|
@ -150,8 +152,9 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
|
|||
DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
|
||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||
bool addedNewUnigram = false;
|
||||
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, timestamp,
|
||||
&addedNewUnigram)) {
|
||||
// TODO: Add shortcut.
|
||||
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, isNotAWord,
|
||||
isBlacklisted, timestamp, &addedNewUnigram)) {
|
||||
if (addedNewUnigram) {
|
||||
mUnigramCount++;
|
||||
}
|
||||
|
|
|
@ -88,6 +88,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
}
|
||||
|
||||
bool addUnigramWord(const int *const word, const int length, const int probability,
|
||||
const int *const shortcutTargetCodePoints, const int shortcutLength,
|
||||
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
||||
const int timestamp);
|
||||
|
||||
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||
|
|
Loading…
Reference in a new issue