Merge "Support bigram historical information migration."
This commit is contained in:
commit
61fc329901
15 changed files with 71 additions and 56 deletions
|
@ -335,7 +335,7 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
|
||||||
if (!shortcutTargetCodePoints.empty()) {
|
if (!shortcutTargetCodePoints.empty()) {
|
||||||
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
|
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
|
||||||
}
|
}
|
||||||
// Use 1 for count to indicate the word has inputed.
|
// Use 1 for count to indicate the word has inputted.
|
||||||
const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
|
const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
|
||||||
probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
|
probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
|
||||||
dictionary->addUnigramWord(codePoints, codePointCount, &unigramProperty);
|
dictionary->addUnigramWord(codePoints, codePointCount, &unigramProperty);
|
||||||
|
@ -353,8 +353,12 @@ static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz,
|
||||||
jsize word1Length = env->GetArrayLength(word1);
|
jsize word1Length = env->GetArrayLength(word1);
|
||||||
int word1CodePoints[word1Length];
|
int word1CodePoints[word1Length];
|
||||||
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
|
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
|
||||||
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints,
|
const std::vector<int> bigramTargetCodePoints(
|
||||||
word1Length, probability, timestamp);
|
word1CodePoints, word1CodePoints + word1Length);
|
||||||
|
// Use 1 for count to indicate the bigram has inputted.
|
||||||
|
const BigramProperty bigramProperty(&bigramTargetCodePoints, probability,
|
||||||
|
timestamp, 0 /* level */, 1 /* count */);
|
||||||
|
dictionary->addBigramWords(word0CodePoints, word0Length, &bigramProperty);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
|
static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
|
||||||
|
@ -437,14 +441,18 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
||||||
env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
|
env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
|
||||||
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
|
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
|
||||||
}
|
}
|
||||||
// Use 1 for count to indicate the word has inputed.
|
// Use 1 for count to indicate the word has inputted.
|
||||||
const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
|
const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
|
||||||
unigramProbability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
|
unigramProbability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
|
||||||
dictionary->addUnigramWord(word1CodePoints, word1Length, &unigramProperty);
|
dictionary->addUnigramWord(word1CodePoints, word1Length, &unigramProperty);
|
||||||
if (word0) {
|
if (word0) {
|
||||||
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
|
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
|
||||||
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,
|
const std::vector<int> bigramTargetCodePoints(
|
||||||
bigramProbability, timestamp);
|
word1CodePoints, word1CodePoints + word1Length);
|
||||||
|
// Use 1 for count to indicate the bigram has inputted.
|
||||||
|
const BigramProperty bigramProperty(&bigramTargetCodePoints, bigramProbability,
|
||||||
|
timestamp, 0 /* level */, 1 /* count */);
|
||||||
|
dictionary->addBigramWords(word0CodePoints, word0Length, &bigramProperty);
|
||||||
}
|
}
|
||||||
if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
|
if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
return i + 1;
|
return i + 1;
|
||||||
|
@ -558,11 +566,9 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (const BigramProperty &bigarmProperty : *wordProperty.getBigramProperties()) {
|
for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) {
|
||||||
const std::vector<int> *targetCodePoints = bigarmProperty.getTargetCodePoints();
|
|
||||||
if (!dictionaryStructureWithBufferPolicy->addBigramWords(wordCodePoints, wordLength,
|
if (!dictionaryStructureWithBufferPolicy->addBigramWords(wordCodePoints, wordLength,
|
||||||
targetCodePoints->data(), targetCodePoints->size(),
|
&bigramProperty)) {
|
||||||
bigarmProperty.getProbability(), bigarmProperty.getTimestamp())) {
|
|
||||||
LogUtils::logToJava(env, "Cannot add bigram to the new dict.");
|
LogUtils::logToJava(env, "Cannot add bigram to the new dict.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,11 +88,10 @@ void Dictionary::addUnigramWord(const int *const word, const int length,
|
||||||
mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, unigramProperty);
|
mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, unigramProperty);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
|
void Dictionary::addBigramWords(const int *const word0, const int length0,
|
||||||
const int length1, const int probability, const int timestamp) {
|
const BigramProperty *const bigramProperty) {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1,
|
mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, bigramProperty);
|
||||||
probability, timestamp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Dictionary::removeBigramWords(const int *const word0, const int length0,
|
void Dictionary::removeBigramWords(const int *const word0, const int length0,
|
||||||
|
|
|
@ -76,8 +76,8 @@ class Dictionary {
|
||||||
void addUnigramWord(const int *const codePoints, const int codePointCount,
|
void addUnigramWord(const int *const codePoints, const int codePointCount,
|
||||||
const UnigramProperty *const unigramProperty);
|
const UnigramProperty *const unigramProperty);
|
||||||
|
|
||||||
void addBigramWords(const int *const word0, const int length0, const int *const word1,
|
void addBigramWords(const int *const word0, const int length0,
|
||||||
const int length1, const int probability, const int timestamp);
|
const BigramProperty *const bigramProperty);
|
||||||
|
|
||||||
void removeBigramWords(const int *const word0, const int length0, const int *const word1,
|
void removeBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||||
const int length1);
|
const int length1);
|
||||||
|
|
|
@ -73,8 +73,8 @@ class DictionaryStructureWithBufferPolicy {
|
||||||
const UnigramProperty *const unigramProperty) = 0;
|
const UnigramProperty *const unigramProperty) = 0;
|
||||||
|
|
||||||
// Returns whether the update was success or not.
|
// Returns whether the update was success or not.
|
||||||
virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
virtual bool addBigramWords(const int *const word0, const int length0,
|
||||||
const int length1, const int probability, const int timestamp) = 0;
|
const BigramProperty *const bigramProperty) = 0;
|
||||||
|
|
||||||
// Returns whether the update was success or not.
|
// Returns whether the update was success or not.
|
||||||
virtual bool removeBigramWords(const int *const word0, const int length0,
|
virtual bool removeBigramWords(const int *const word0, const int length0,
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
|
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
|
||||||
|
|
||||||
|
#include "suggest/core/dictionary/property/bigram_property.h"
|
||||||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
|
||||||
|
@ -49,13 +50,12 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
|
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
|
||||||
const int newProbability, const int timestamp, bool *const outAddedNewEntry) {
|
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
|
||||||
// 1. The word has no bigrams yet.
|
// 1. The word has no bigrams yet.
|
||||||
// 2. The word has bigrams, and there is the target in the list.
|
// 2. The word has bigrams, and there is the target in the list.
|
||||||
// 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
|
// 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
|
||||||
// 4. The word has bigrams. We have to append new bigram entry to the list.
|
// 4. The word has bigrams. We have to append new bigram entry to the list.
|
||||||
// 5. Same as 4, but the list is the last entry of the content file.
|
// 5. Same as 4, but the list is the last entry of the content file.
|
||||||
|
|
||||||
if (outAddedNewEntry) {
|
if (outAddedNewEntry) {
|
||||||
*outAddedNewEntry = false;
|
*outAddedNewEntry = false;
|
||||||
}
|
}
|
||||||
|
@ -69,7 +69,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
|
||||||
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
||||||
newTargetTerminalId);
|
newTargetTerminalId);
|
||||||
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
|
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
|
||||||
newProbability, timestamp);
|
bigramProperty);
|
||||||
// Write an entry.
|
// Write an entry.
|
||||||
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||||
if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
|
if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
|
||||||
|
@ -102,7 +102,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
|
||||||
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
||||||
newTargetTerminalId);
|
newTargetTerminalId);
|
||||||
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
||||||
&newBigramEntry, newProbability, timestamp);
|
&newBigramEntry, bigramProperty);
|
||||||
if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
|
if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -128,7 +128,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
|
||||||
const BigramEntry updatedBigramEntry =
|
const BigramEntry updatedBigramEntry =
|
||||||
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
|
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
|
||||||
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
||||||
&updatedBigramEntry, newProbability, timestamp);
|
&updatedBigramEntry, bigramProperty);
|
||||||
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
|
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -253,19 +253,19 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
|
||||||
}
|
}
|
||||||
|
|
||||||
const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
|
const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
|
||||||
const BigramEntry *const originalBigramEntry, const int newProbability,
|
const BigramEntry *const originalBigramEntry,
|
||||||
const int timestamp) const {
|
const BigramProperty *const bigramProperty) const {
|
||||||
// TODO: Consolidate historical info and probability.
|
// TODO: Consolidate historical info and probability.
|
||||||
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
||||||
// Use 1 for count to indicate the bigram has inputed.
|
const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(),
|
||||||
const HistoricalInfo historicalInfoForUpdate(timestamp, 0 /* level */, 1 /* count */);
|
bigramProperty->getLevel(), bigramProperty->getCount());
|
||||||
const HistoricalInfo updatedHistoricalInfo =
|
const HistoricalInfo updatedHistoricalInfo =
|
||||||
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||||
originalBigramEntry->getHistoricalInfo(), newProbability,
|
originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(),
|
||||||
&historicalInfoForUpdate, mHeaderPolicy);
|
&historicalInfoForUpdate, mHeaderPolicy);
|
||||||
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
|
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
|
||||||
} else {
|
} else {
|
||||||
return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
|
return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class BigramDictContent;
|
class BigramDictContent;
|
||||||
|
class BigramProperty;
|
||||||
class HeaderPolicy;
|
class HeaderPolicy;
|
||||||
class TerminalPositionLookupTable;
|
class TerminalPositionLookupTable;
|
||||||
|
|
||||||
|
@ -43,8 +44,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
|
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
|
||||||
}
|
}
|
||||||
|
|
||||||
bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability,
|
bool addNewEntry(const int terminalId, const int newTargetTerminalId,
|
||||||
const int timestamp, bool *const outAddedNewEntry);
|
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
|
||||||
|
|
||||||
bool removeEntry(const int terminalId, const int targetTerminalId);
|
bool removeEntry(const int terminalId, const int targetTerminalId);
|
||||||
|
|
||||||
|
@ -60,7 +61,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
int *const outTailEntryPos) const;
|
int *const outTailEntryPos) const;
|
||||||
|
|
||||||
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
|
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
|
||||||
const int newProbability, const int timestamp) const;
|
const BigramProperty *const bigramProperty) const;
|
||||||
|
|
||||||
bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos);
|
bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos);
|
||||||
|
|
||||||
|
|
|
@ -85,13 +85,13 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
||||||
const int probability, const int timestamp, bool *const outAddedNewBigram) {
|
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
|
||||||
const PtNodeParams sourcePtNodeParams(
|
const PtNodeParams sourcePtNodeParams(
|
||||||
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
|
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
|
||||||
const PtNodeParams targetPtNodeParams(
|
const PtNodeParams targetPtNodeParams(
|
||||||
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos));
|
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos));
|
||||||
return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, probability,
|
return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams,
|
||||||
timestamp, outAddedNewBigram);
|
bigramProperty, outAddedNewBigram);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove a bigram relation from word0Pos to word1Pos.
|
// Remove a bigram relation from word0Pos to word1Pos.
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
class BigramProperty;
|
||||||
class BufferWithExtendableBuffer;
|
class BufferWithExtendableBuffer;
|
||||||
class DynamicPtReadingHelper;
|
class DynamicPtReadingHelper;
|
||||||
class PtNodeReader;
|
class PtNodeReader;
|
||||||
|
@ -42,8 +43,8 @@ class DynamicPtUpdatingHelper {
|
||||||
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
|
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
|
||||||
|
|
||||||
// Add a bigram relation from word0Pos to word1Pos.
|
// Add a bigram relation from word0Pos to word1Pos.
|
||||||
bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
|
bool addBigramWords(const int word0Pos, const int word1Pos,
|
||||||
const int timestamp, bool *const outAddedNewBigram);
|
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram);
|
||||||
|
|
||||||
// Remove a bigram relation from word0Pos to word1Pos.
|
// Remove a bigram relation from word0Pos to word1Pos.
|
||||||
bool removeBigramWords(const int word0Pos, const int word1Pos);
|
bool removeBigramWords(const int word0Pos, const int word1Pos);
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
class BigramProperty;
|
||||||
class UnigramProperty;
|
class UnigramProperty;
|
||||||
|
|
||||||
// Interface class used to write PtNode information.
|
// Interface class used to write PtNode information.
|
||||||
|
@ -70,7 +71,7 @@ class PtNodeWriter {
|
||||||
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;
|
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;
|
||||||
|
|
||||||
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||||
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
|
const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
|
||||||
bool *const outAddedNewBigram) = 0;
|
bool *const outAddedNewBigram) = 0;
|
||||||
|
|
||||||
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||||
|
|
|
@ -88,8 +88,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
bool addBigramWords(const int *const word0, const int length0,
|
||||||
const int length1, const int probability, const int timestamp) {
|
const BigramProperty *const bigramProperty) {
|
||||||
// This method should not be called for non-updatable dictionary.
|
// This method should not be called for non-updatable dictionary.
|
||||||
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -223,11 +223,10 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
|
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
|
||||||
const PtNodeParams *const sourcePtNodeParams,
|
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
|
||||||
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
|
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
|
||||||
bool *const outAddedNewBigram) {
|
|
||||||
if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
|
if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
|
||||||
targetPtNodeParam->getTerminalId(), probability, timestamp, outAddedNewBigram)) {
|
targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
|
||||||
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
|
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
|
||||||
sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
|
sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -76,7 +76,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
||||||
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
|
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
|
||||||
|
|
||||||
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||||
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
|
const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
|
||||||
bool *const outAddedNewBigram);
|
bool *const outAddedNewBigram);
|
||||||
|
|
||||||
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||||
|
|
|
@ -209,8 +209,7 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
|
bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
|
||||||
const int *const word1, const int length1, const int probability,
|
const BigramProperty *const bigramProperty) {
|
||||||
const int timestamp) {
|
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
|
@ -220,9 +219,10 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le
|
||||||
mDictBuffer->getTailPosition());
|
mDictBuffer->getTailPosition());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (length0 > MAX_WORD_LENGTH || length1 > MAX_WORD_LENGTH) {
|
if (length0 > MAX_WORD_LENGTH
|
||||||
|
|| bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
|
||||||
AKLOGE("Either src word or target word is too long to insert the bigram to the dictionary. "
|
AKLOGE("Either src word or target word is too long to insert the bigram to the dictionary. "
|
||||||
"length0: %d, length1: %d", length0, length1);
|
"length0: %d, length1: %d", length0, bigramProperty->getTargetCodePoints()->size());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
||||||
|
@ -230,14 +230,14 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le
|
||||||
if (word0Pos == NOT_A_DICT_POS) {
|
if (word0Pos == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
|
const int word1Pos = getTerminalPtNodePositionOfWord(
|
||||||
false /* forceLowerCaseSearch */);
|
bigramProperty->getTargetCodePoints()->data(),
|
||||||
|
bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
|
||||||
if (word1Pos == NOT_A_DICT_POS) {
|
if (word1Pos == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
bool addedNewBigram = false;
|
bool addedNewBigram = false;
|
||||||
if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, timestamp,
|
if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, bigramProperty, &addedNewBigram)) {
|
||||||
&addedNewBigram)) {
|
|
||||||
if (addedNewBigram) {
|
if (addedNewBigram) {
|
||||||
mBigramCount++;
|
mBigramCount++;
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,8 +93,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
bool addUnigramWord(const int *const word, const int length,
|
bool addUnigramWord(const int *const word, const int length,
|
||||||
const UnigramProperty *const unigramProperty);
|
const UnigramProperty *const unigramProperty);
|
||||||
|
|
||||||
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
bool addBigramWords(const int *const word0, const int length0,
|
||||||
const int length1, const int probability, const int timestamp);
|
const BigramProperty *const bigramProperty);
|
||||||
|
|
||||||
bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
|
bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||||
const int length1);
|
const int length1);
|
||||||
|
|
|
@ -580,7 +580,6 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
// TODO: Add tests for bigrams when the implementation gets ready.
|
|
||||||
addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
|
addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
|
||||||
assertTrue(binaryDictionary.isValidWord("aaa"));
|
assertTrue(binaryDictionary.isValidWord("aaa"));
|
||||||
addUnigramWord(binaryDictionary, "bbb", Dictionary.NOT_A_PROBABILITY);
|
addUnigramWord(binaryDictionary, "bbb", Dictionary.NOT_A_PROBABILITY);
|
||||||
|
@ -590,6 +589,11 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
|
addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
|
||||||
addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
|
addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
|
||||||
addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
|
addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
|
||||||
|
addUnigramWord(binaryDictionary, "abc", DUMMY_PROBABILITY);
|
||||||
|
addBigramWords(binaryDictionary, "aaa", "abc", DUMMY_PROBABILITY);
|
||||||
|
assertTrue(binaryDictionary.isValidBigram("aaa", "abc"));
|
||||||
|
addBigramWords(binaryDictionary, "aaa", "bbb", Dictionary.NOT_A_PROBABILITY);
|
||||||
|
assertFalse(binaryDictionary.isValidBigram("aaa", "bbb"));
|
||||||
|
|
||||||
assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
|
assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
|
||||||
assertTrue(binaryDictionary.migrateTo(toFormatVersion));
|
assertTrue(binaryDictionary.migrateTo(toFormatVersion));
|
||||||
|
@ -600,6 +604,10 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
assertTrue(binaryDictionary.getFrequency("aaa") < binaryDictionary.getFrequency("ccc"));
|
assertTrue(binaryDictionary.getFrequency("aaa") < binaryDictionary.getFrequency("ccc"));
|
||||||
addUnigramWord(binaryDictionary, "bbb", Dictionary.NOT_A_PROBABILITY);
|
addUnigramWord(binaryDictionary, "bbb", Dictionary.NOT_A_PROBABILITY);
|
||||||
assertTrue(binaryDictionary.isValidWord("bbb"));
|
assertTrue(binaryDictionary.isValidWord("bbb"));
|
||||||
|
assertTrue(binaryDictionary.isValidBigram("aaa", "abc"));
|
||||||
|
assertFalse(binaryDictionary.isValidBigram("aaa", "bbb"));
|
||||||
|
addBigramWords(binaryDictionary, "aaa", "bbb", Dictionary.NOT_A_PROBABILITY);
|
||||||
|
assertTrue(binaryDictionary.isValidBigram("aaa", "bbb"));
|
||||||
binaryDictionary.close();
|
binaryDictionary.close();
|
||||||
dictFile.delete();
|
dictFile.delete();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue