Merge "Support controlling some unigram flags in addUnigramWord."

This commit is contained in:
Keisuke Kuroyanagi 2013-12-04 11:43:46 +00:00 committed by Android (Google) Code Review
commit 5b0ca975c4
9 changed files with 108 additions and 53 deletions

View file

@ -301,11 +301,12 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
int codePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
int shortcutTargetCodePoints[wordLength];
int shortcutTargetCodePoints[shortcutLength];
if (shortcutTarget) {
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
}
dictionary->addUnigramWord(codePoints, wordLength, probability, timestamp);
dictionary->addUnigramWord(codePoints, wordLength, probability, shortcutTargetCodePoints,
shortcutLength, shortuctProbability, isNotAWord, isBlacklisted, timestamp);
}
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
@ -356,7 +357,6 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
jclass languageModelParamClass = env->GetObjectClass(languageModelParam);
env->DeleteLocalRef(languageModelParam);
// TODO: Support shortcut and flags.
jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I");
jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I");
jfieldID unigramProbabilityFieldId =
@ -365,6 +365,14 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
env->GetFieldID(languageModelParamClass, "mBigramProbability", "I");
jfieldID timestampFieldId =
env->GetFieldID(languageModelParamClass, "mTimestamp", "I");
jfieldID shortcutTargetFieldId =
env->GetFieldID(languageModelParamClass, "mShortcutTarget", "[I");
jfieldID shortcutProbabilityFieldId =
env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I");
jfieldID isNotAWordFieldId =
env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z");
jfieldID isBlacklistedFieldId =
env->GetFieldID(languageModelParamClass, "mIsBlacklisted", "Z");
env->DeleteLocalRef(languageModelParamClass);
for (int i = startIndex; i < languageModelParamCount; ++i) {
@ -386,7 +394,19 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
jint timestamp = env->GetIntField(languageModelParam, timestampFieldId);
dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability, timestamp);
jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId);
jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId);
jintArray shortcutTarget = static_cast<jintArray>(
env->GetObjectField(languageModelParam, shortcutTargetFieldId));
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
int shortcutTargetCodePoints[shortcutLength];
if (shortcutTarget) {
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
}
jint shortcutProbability = env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability,
shortcutTargetCodePoints, shortcutLength, shortcutProbability,
isNotAWord, isBlacklisted, timestamp);
if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,
@ -397,6 +417,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
}
env->DeleteLocalRef(word0);
env->DeleteLocalRef(word1);
env->DeleteLocalRef(shortcutTarget);
env->DeleteLocalRef(languageModelParam);
}
return languageModelParamCount;

View file

@ -99,10 +99,13 @@ int Dictionary::getBigramProbability(const int *word0, int length0, const int *w
}
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability,
const int *const shortcutTargetCodePoints, const int shortcutLength,
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp) {
TimeKeeper::setCurrentTime();
mDictionaryStructureWithBufferPolicy.get()->addUnigramWord(word, length, probability,
timestamp);
shortcutTargetCodePoints, shortcutLength, shortcutProbability, isNotAWord,
isBlacklisted, timestamp);
}
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,

View file

@ -73,6 +73,8 @@ class Dictionary {
int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
void addUnigramWord(const int *const word, const int length, const int probability,
const int *const shortcutTargetCodePoints, const int shortcutLength,
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp);
void addBigramWords(const int *const word0, const int length0, const int *const word1,

View file

@ -67,7 +67,9 @@ class DictionaryStructureWithBufferPolicy {
// Returns whether the update was success or not.
virtual bool addUnigramWord(const int *const word, const int length,
const int probability, const int timestamp) = 0;
const int probability, const int *const shortcutTargetCodePoints,
const int shortcutLength, const int shortcutProbability, const bool isNotAWord,
const bool isBlacklisted,const int timestamp) = 0;
// Returns whether the update was success or not.
virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,

View file

@ -77,6 +77,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
}
bool addUnigramWord(const int *const word, const int length, const int probability,
const int *const shortcutTargetCodePoints, const int shortcutLength,
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");

View file

@ -30,7 +30,8 @@ const int DynamicPatriciaTrieUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
DynamicPatriciaTrieReadingHelper *const readingHelper,
const int *const wordCodePoints, const int codePointCount, const int probability,
const int timestamp, bool *const outAddedNewUnigram) {
const bool isNotAWord, const bool isBlacklisted, const int timestamp,
bool *const outAddedNewUnigram) {
int parentPos = NOT_A_DICT_POS;
while (!readingHelper->isEnd()) {
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
@ -52,18 +53,20 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
wordCodePoints[matchedCodePointCount + j])) {
*outAddedNewUnigram = true;
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, probability, timestamp,
wordCodePoints + matchedCodePointCount,
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, isNotAWord, isBlacklisted,
probability, timestamp, wordCodePoints + matchedCodePointCount,
codePointCount - matchedCodePointCount);
}
}
// All characters are matched.
if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
return setPtNodeProbability(&ptNodeParams, probability, timestamp, outAddedNewUnigram);
return setPtNodeProbability(&ptNodeParams, isNotAWord, isBlacklisted, probability,
timestamp, outAddedNewUnigram);
}
if (!ptNodeParams.hasChildren()) {
*outAddedNewUnigram = true;
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, probability, timestamp,
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
isNotAWord, isBlacklisted, probability, timestamp,
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
}
@ -79,8 +82,8 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
*outAddedNewUnigram = true;
return createAndInsertNodeIntoPtNodeArray(parentPos,
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
codePointCount - readingHelper->getPrevTotalCodePointCount(), probability,
timestamp, &pos);
codePointCount - readingHelper->getPrevTotalCodePointCount(),
isNotAWord, isBlacklisted, probability, timestamp, &pos);
}
bool DynamicPatriciaTrieUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
@ -103,19 +106,21 @@ bool DynamicPatriciaTrieUpdatingHelper::removeBigramWords(const int word0Pos, co
}
bool DynamicPatriciaTrieUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
const int timestamp, int *const forwardLinkFieldPos) {
const int *const nodeCodePoints, const int nodeCodePointCount,
const bool isNotAWord, const bool isBlacklisted, const int probability,
const int timestamp, int *const forwardLinkFieldPos) {
const int newPtNodeArrayPos = mBuffer->getTailPosition();
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
newPtNodeArrayPos, forwardLinkFieldPos)) {
return false;
}
return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
probability, timestamp);
isNotAWord, isBlacklisted, probability, timestamp);
}
bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
const PtNodeParams *const originalPtNodeParams, const int probability, const int timestamp,
const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
const bool isBlacklisted, const int probability, const int timestamp,
bool *const outAddedNewUnigram) {
if (originalPtNodeParams->isTerminal()) {
// Overwrites the probability.
@ -127,9 +132,9 @@ bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
const int movedPos = mBuffer->getTailPosition();
int writingPos = movedPos;
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
true /* isTerminal */, originalPtNodeParams->getParentPos(),
originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(),
probability));
isNotAWord, isBlacklisted, true /* isTerminal */,
originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(),
originalPtNodeParams->getCodePoints(), probability));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
timestamp, &writingPos)) {
return false;
@ -142,25 +147,28 @@ bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
}
bool DynamicPatriciaTrieUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
const PtNodeParams *const parentPtNodeParams, const int probability, const int timestamp,
const PtNodeParams *const parentPtNodeParams, const bool isNotAWord,
const bool isBlacklisted, const int probability, const int timestamp,
const int *const codePoints, const int codePointCount) {
const int newPtNodeArrayPos = mBuffer->getTailPosition();
if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
return false;
}
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
codePointCount, probability, timestamp);
codePointCount, isNotAWord, isBlacklisted, probability, timestamp);
}
bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
const int probability, const int timestamp) {
const bool isNotAWord, const bool isBlacklisted, const int probability,
const int timestamp) {
int writingPos = mBuffer->getTailPosition();
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
1 /* arraySize */, &writingPos)) {
return false;
}
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */,
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
isNotAWord, isBlacklisted, true /* isTerminal */,
parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp,
&writingPos)) {
@ -176,8 +184,8 @@ bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
// Returns whether the dictionary updating was succeeded or not.
bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
const int probabilityOfNewPtNode, const int timestamp, const int *const newNodeCodePoints,
const int newNodeCodePointCount) {
const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
const int timestamp, const int *const newNodeCodePoints, const int newNodeCodePointCount) {
// When addsExtraChild is true, split the reallocating PtNode and add new child.
// Reallocating PtNode: abcde, newNode: abcxy.
// abc (1st, not terminal) __ de (2nd)
@ -191,14 +199,16 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
// Write the 1st part of the reallocating node. The children position will be updated later
// with actual children position.
if (addsExtraChild) {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(false /* isTerminal */,
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */,
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY));
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
return false;
}
} else {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */,
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
isNotAWord, isBlacklisted, true /* isTerminal */,
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
@ -216,6 +226,7 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
// Write the 2nd part of the reallocating node.
const int secondPartOfReallocatedPtNodePos = writingPos;
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(),
reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
@ -224,7 +235,8 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
return false;
}
if (addsExtraChild) {
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(true /* isTerminal */,
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
isNotAWord, isBlacklisted, true /* isTerminal */,
firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
@ -249,23 +261,25 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
}
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams(
const PtNodeParams *const originalPtNodeParams, const bool isTerminal, const int parentPos,
const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
const bool isBlacklisted, const bool isTerminal, const int parentPos,
const int codePointCount, const int *const codePoints, const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(), isTerminal,
originalPtNodeParams->hasShortcutTargets(), originalPtNodeParams->hasBigrams(),
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(),
originalPtNodeParams->hasBigrams(), codePointCount > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
probability);
}
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getPtNodeParamsForNewPtNode(
const bool isTerminal, const int parentPos, const int codePointCount,
const int *const codePoints, const int probability) const {
const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
const int parentPos, const int codePointCount, const int *const codePoints,
const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
false /* isBlacklisted */, false /* isNotAWord */, isTerminal,
false /* hasShortcutTargets */, false /* hasBigrams */,
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);
}

View file

@ -42,7 +42,8 @@ class DynamicPatriciaTrieUpdatingHelper {
// Add a word to the dictionary. If the word already exists, update the probability.
bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
const int *const wordCodePoints, const int codePointCount, const int probability,
const int timestamp, bool *const outAddedNewUnigram);
const bool isNotAWord, const bool isBlacklisted, const int timestamp,
bool *const outAddedNewUnigram);
// Add a bigram relation from word0Pos to word1Pos.
bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
@ -61,29 +62,34 @@ class DynamicPatriciaTrieUpdatingHelper {
PtNodeWriter *const mPtNodeWriter;
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability, const int timestamp,
int *const forwardLinkFieldPos);
const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
const int probability, const int timestamp, int *const forwardLinkFieldPos);
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const int probability,
const int timestamp, bool *const outAddedNewUnigram);
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
const bool isBlacklisted, const int probability, const int timestamp,
bool *const outAddedNewUnigram);
bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
const int probability, const int timestamp, const int *const codePoints,
const int codePointCount);
const bool isNotAWord, const bool isBlacklisted, const int probability,
const int timestamp, const int *const codePoints, const int codePointCount);
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability, const int timestamp);
const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
const int probability, const int timestamp);
bool reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
const int probabilityOfNewPtNode, const int timestamp,
const int *const newNodeCodePoints, const int newNodeCodePointCount);
const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
const int timestamp, const int *const newNodeCodePoints,
const int newNodeCodePointCount);
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
const bool isTerminal, const int parentPos, const int codePointCount,
const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
const int parentPos, const int codePointCount,
const int *const codePoints, const int probability) const;
const PtNodeParams getPtNodeParamsForNewPtNode(const bool isTerminal, const int parentPos,
const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted,
const bool isTerminal, const int parentPos,
const int codePointCount, const int *const codePoints, const int probability) const;
};
} // namespace latinime

View file

@ -137,7 +137,9 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons
}
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
const int probability, const int timestamp) {
const int probability, const int *const shortcutTargetCodePoints, const int shortcutLength,
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp) {
if (!mBuffers.get()->isUpdatable()) {
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
@ -150,8 +152,9 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false;
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, timestamp,
&addedNewUnigram)) {
// TODO: Add shortcut.
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, isNotAWord,
isBlacklisted, timestamp, &addedNewUnigram)) {
if (addedNewUnigram) {
mUnigramCount++;
}

View file

@ -88,6 +88,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
}
bool addUnigramWord(const int *const word, const int length, const int probability,
const int *const shortcutTargetCodePoints, const int shortcutLength,
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp);
bool addBigramWords(const int *const word0, const int length0, const int *const word1,