am 2b02a680: Merge "Use CodePointArrayView in Dictionary."
* commit '2b02a6805b3c7f192e3cd20d8ef4d038fbf97f57': Use CodePointArrayView in Dictionary.main
commit
ba55981f3e
|
@ -259,20 +259,21 @@ static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz,
|
||||||
jintArray word) {
|
jintArray word) {
|
||||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||||
if (!dictionary) return NOT_A_PROBABILITY;
|
if (!dictionary) return NOT_A_PROBABILITY;
|
||||||
const jsize wordLength = env->GetArrayLength(word);
|
const jsize codePointCount = env->GetArrayLength(word);
|
||||||
int codePoints[wordLength];
|
int codePoints[codePointCount];
|
||||||
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
|
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
|
||||||
return dictionary->getProbability(codePoints, wordLength);
|
return dictionary->getProbability(CodePointArrayView(codePoints, codePointCount));
|
||||||
}
|
}
|
||||||
|
|
||||||
static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches(
|
static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches(
|
||||||
JNIEnv *env, jclass clazz, jlong dict, jintArray word) {
|
JNIEnv *env, jclass clazz, jlong dict, jintArray word) {
|
||||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||||
if (!dictionary) return NOT_A_PROBABILITY;
|
if (!dictionary) return NOT_A_PROBABILITY;
|
||||||
const jsize wordLength = env->GetArrayLength(word);
|
const jsize codePointCount = env->GetArrayLength(word);
|
||||||
int codePoints[wordLength];
|
int codePoints[codePointCount];
|
||||||
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
|
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
|
||||||
return dictionary->getMaxProbabilityOfExactMatches(codePoints, wordLength);
|
return dictionary->getMaxProbabilityOfExactMatches(
|
||||||
|
CodePointArrayView(codePoints, codePointCount));
|
||||||
}
|
}
|
||||||
|
|
||||||
static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass clazz,
|
static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass clazz,
|
||||||
|
@ -285,7 +286,8 @@ static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass cl
|
||||||
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
|
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
|
||||||
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
|
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
|
||||||
prevWordCodePointArrays, isBeginningOfSentenceArray);
|
prevWordCodePointArrays, isBeginningOfSentenceArray);
|
||||||
return dictionary->getNgramProbability(&prevWordsInfo, wordCodePoints, wordLength);
|
return dictionary->getNgramProbability(&prevWordsInfo,
|
||||||
|
CodePointArrayView(wordCodePoints, wordLength));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Method to iterate all words in the dictionary for makedict.
|
// Method to iterate all words in the dictionary for makedict.
|
||||||
|
@ -340,7 +342,8 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, codePointCount);
|
const WordProperty wordProperty = dictionary->getWordProperty(
|
||||||
|
CodePointArrayView(wordCodePoints, codePointCount));
|
||||||
wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
|
wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
|
||||||
outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
|
outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
|
||||||
outShortcutProbabilities);
|
outShortcutProbabilities);
|
||||||
|
@ -366,7 +369,8 @@ static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz,
|
||||||
// Use 1 for count to indicate the word has inputted.
|
// Use 1 for count to indicate the word has inputted.
|
||||||
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
|
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
|
||||||
isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
|
isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
|
||||||
return dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
|
return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
|
||||||
|
&unigramProperty);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
|
static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
|
||||||
|
@ -378,7 +382,7 @@ static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass cla
|
||||||
jsize codePointCount = env->GetArrayLength(word);
|
jsize codePointCount = env->GetArrayLength(word);
|
||||||
int codePoints[codePointCount];
|
int codePoints[codePointCount];
|
||||||
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
|
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
|
||||||
return dictionary->removeUnigramEntry(codePoints, codePointCount);
|
return dictionary->removeUnigramEntry(CodePointArrayView(codePoints, codePointCount));
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
|
static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
|
||||||
|
@ -410,10 +414,11 @@ static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz
|
||||||
}
|
}
|
||||||
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
|
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
|
||||||
prevWordCodePointArrays, isBeginningOfSentenceArray);
|
prevWordCodePointArrays, isBeginningOfSentenceArray);
|
||||||
jsize wordLength = env->GetArrayLength(word);
|
jsize codePointCount = env->GetArrayLength(word);
|
||||||
int wordCodePoints[wordLength];
|
int wordCodePoints[codePointCount];
|
||||||
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
|
env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
|
||||||
return dictionary->removeNgramEntry(&prevWordsInfo, wordCodePoints, wordLength);
|
return dictionary->removeNgramEntry(&prevWordsInfo,
|
||||||
|
CodePointArrayView(wordCodePoints, codePointCount));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns how many language model params are processed.
|
// Returns how many language model params are processed.
|
||||||
|
@ -484,7 +489,8 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
||||||
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
|
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
|
||||||
isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
|
isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
|
||||||
&shortcuts);
|
&shortcuts);
|
||||||
dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty);
|
dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
|
||||||
|
&unigramProperty);
|
||||||
if (word0) {
|
if (word0) {
|
||||||
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
|
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
|
||||||
const std::vector<int> bigramTargetCodePoints(
|
const std::vector<int> bigramTargetCodePoints(
|
||||||
|
@ -568,8 +574,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
|
||||||
// Add unigrams.
|
// Add unigrams.
|
||||||
do {
|
do {
|
||||||
token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
|
token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
|
||||||
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
|
const WordProperty wordProperty = dictionary->getWordProperty(
|
||||||
wordCodePointCount);
|
CodePointArrayView(wordCodePoints, wordCodePointCount));
|
||||||
if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
|
if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
|
||||||
// Skip beginning-of-sentence unigram.
|
// Skip beginning-of-sentence unigram.
|
||||||
continue;
|
continue;
|
||||||
|
@ -593,8 +599,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
|
||||||
// Add bigrams.
|
// Add bigrams.
|
||||||
do {
|
do {
|
||||||
token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
|
token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
|
||||||
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
|
const WordProperty wordProperty = dictionary->getWordProperty(
|
||||||
wordCodePointCount);
|
CodePointArrayView(wordCodePoints, wordCodePointCount));
|
||||||
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
|
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
|
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
|
||||||
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
|
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
|
||||||
|
|
|
@ -102,21 +102,21 @@ void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
||||||
mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener);
|
mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener);
|
||||||
}
|
}
|
||||||
|
|
||||||
int Dictionary::getProbability(const int *word, int length) const {
|
int Dictionary::getProbability(const CodePointArrayView codePoints) const {
|
||||||
return getNgramProbability(nullptr /* prevWordsInfo */, word, length);
|
return getNgramProbability(nullptr /* prevWordsInfo */, codePoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) const {
|
int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
return DictionaryUtils::getMaxProbabilityOfExactMatches(
|
return DictionaryUtils::getMaxProbabilityOfExactMatches(
|
||||||
mDictionaryStructureWithBufferPolicy.get(), word, length);
|
mDictionaryStructureWithBufferPolicy.get(), codePoints.data(), codePoints.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
|
int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
|
||||||
int length) const {
|
const CodePointArrayView codePoints) const {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
int wordId = mDictionaryStructureWithBufferPolicy->getWordId(
|
const int wordId = mDictionaryStructureWithBufferPolicy->getWordId(codePoints,
|
||||||
CodePointArrayView(word, length), false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY;
|
if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY;
|
||||||
if (!prevWordsInfo) {
|
if (!prevWordsInfo) {
|
||||||
return getDictionaryStructurePolicy()->getProbabilityOfWord(WordIdArrayView(), wordId);
|
return getDictionaryStructurePolicy()->getProbabilityOfWord(WordIdArrayView(), wordId);
|
||||||
|
@ -128,7 +128,7 @@ int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, co
|
||||||
return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId);
|
return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::addUnigramEntry(const int *const word, const int length,
|
bool Dictionary::addUnigramEntry(const CodePointArrayView codePoints,
|
||||||
const UnigramProperty *const unigramProperty) {
|
const UnigramProperty *const unigramProperty) {
|
||||||
if (unigramProperty->representsBeginningOfSentence()
|
if (unigramProperty->representsBeginningOfSentence()
|
||||||
&& !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
|
&& !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
|
||||||
|
@ -137,14 +137,12 @@ bool Dictionary::addUnigramEntry(const int *const word, const int length,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
return mDictionaryStructureWithBufferPolicy->addUnigramEntry(CodePointArrayView(word, length),
|
return mDictionaryStructureWithBufferPolicy->addUnigramEntry(codePoints, unigramProperty);
|
||||||
unigramProperty);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::removeUnigramEntry(const int *const codePoints, const int codePointCount) {
|
bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(
|
return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints);
|
||||||
CodePointArrayView(codePoints, codePointCount));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
@ -154,10 +152,9 @@ bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const int *const word, const int length) {
|
const CodePointArrayView codePoints) {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo,
|
return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, codePoints);
|
||||||
CodePointArrayView(word, length));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::flush(const char *const filePath) {
|
bool Dictionary::flush(const char *const filePath) {
|
||||||
|
@ -182,11 +179,9 @@ void Dictionary::getProperty(const char *const query, const int queryLength, cha
|
||||||
maxResultLength);
|
maxResultLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
const WordProperty Dictionary::getWordProperty(const int *const codePoints,
|
const WordProperty Dictionary::getWordProperty(const CodePointArrayView codePoints) {
|
||||||
const int codePointCount) {
|
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
return mDictionaryStructureWithBufferPolicy->getWordProperty(
|
return mDictionaryStructureWithBufferPolicy->getWordProperty(codePoints);
|
||||||
CodePointArrayView(codePoints, codePointCount));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
|
int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
|
||||||
|
|
|
@ -72,23 +72,23 @@ class Dictionary {
|
||||||
void getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
void getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
||||||
SuggestionResults *const outSuggestionResults) const;
|
SuggestionResults *const outSuggestionResults) const;
|
||||||
|
|
||||||
int getProbability(const int *word, int length) const;
|
int getProbability(const CodePointArrayView codePoints) const;
|
||||||
|
|
||||||
int getMaxProbabilityOfExactMatches(const int *word, int length) const;
|
int getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const;
|
||||||
|
|
||||||
int getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
|
int getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const int *word, int length) const;
|
const CodePointArrayView codePoints) const;
|
||||||
|
|
||||||
bool addUnigramEntry(const int *const codePoints, const int codePointCount,
|
bool addUnigramEntry(const CodePointArrayView codePoints,
|
||||||
const UnigramProperty *const unigramProperty);
|
const UnigramProperty *const unigramProperty);
|
||||||
|
|
||||||
bool removeUnigramEntry(const int *const codePoints, const int codePointCount);
|
bool removeUnigramEntry(const CodePointArrayView codePoints);
|
||||||
|
|
||||||
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const BigramProperty *const bigramProperty);
|
const BigramProperty *const bigramProperty);
|
||||||
|
|
||||||
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
|
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const int length);
|
const CodePointArrayView codePoints);
|
||||||
|
|
||||||
bool flush(const char *const filePath);
|
bool flush(const char *const filePath);
|
||||||
|
|
||||||
|
@ -99,7 +99,7 @@ class Dictionary {
|
||||||
void getProperty(const char *const query, const int queryLength, char *const outResult,
|
void getProperty(const char *const query, const int queryLength, char *const outResult,
|
||||||
const int maxResultLength);
|
const int maxResultLength);
|
||||||
|
|
||||||
const WordProperty getWordProperty(const int *const codePoints, const int codePointCount);
|
const WordProperty getWordProperty(const CodePointArrayView codePoints);
|
||||||
|
|
||||||
// Method to iterate all words in the dictionary.
|
// Method to iterate all words in the dictionary.
|
||||||
// The returned token has to be used to get the next word. If token is 0, this method newly
|
// The returned token has to be used to get the next word. If token is 0, this method newly
|
||||||
|
|
Loading…
Reference in New Issue