Merge "Use CodePointArrayView in Dictionary."
This commit is contained in:
commit
2b02a6805b
3 changed files with 51 additions and 50 deletions
|
@ -259,20 +259,21 @@ static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz,
|
|||
jintArray word) {
|
||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||
if (!dictionary) return NOT_A_PROBABILITY;
|
||||
const jsize wordLength = env->GetArrayLength(word);
|
||||
int codePoints[wordLength];
|
||||
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
|
||||
return dictionary->getProbability(codePoints, wordLength);
|
||||
const jsize codePointCount = env->GetArrayLength(word);
|
||||
int codePoints[codePointCount];
|
||||
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
|
||||
return dictionary->getProbability(CodePointArrayView(codePoints, codePointCount));
|
||||
}
|
||||
|
||||
static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches(
|
||||
JNIEnv *env, jclass clazz, jlong dict, jintArray word) {
|
||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||
if (!dictionary) return NOT_A_PROBABILITY;
|
||||
const jsize wordLength = env->GetArrayLength(word);
|
||||
int codePoints[wordLength];
|
||||
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
|
||||
return dictionary->getMaxProbabilityOfExactMatches(codePoints, wordLength);
|
||||
const jsize codePointCount = env->GetArrayLength(word);
|
||||
int codePoints[codePointCount];
|
||||
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
|
||||
return dictionary->getMaxProbabilityOfExactMatches(
|
||||
CodePointArrayView(codePoints, codePointCount));
|
||||
}
|
||||
|
||||
static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass clazz,
|
||||
|
@ -285,7 +286,8 @@ static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass cl
|
|||
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
|
||||
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
|
||||
prevWordCodePointArrays, isBeginningOfSentenceArray);
|
||||
return dictionary->getNgramProbability(&prevWordsInfo, wordCodePoints, wordLength);
|
||||
return dictionary->getNgramProbability(&prevWordsInfo,
|
||||
CodePointArrayView(wordCodePoints, wordLength));
|
||||
}
|
||||
|
||||
// Method to iterate all words in the dictionary for makedict.
|
||||
|
@ -340,7 +342,8 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
|
|||
return;
|
||||
}
|
||||
}
|
||||
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, codePointCount);
|
||||
const WordProperty wordProperty = dictionary->getWordProperty(
|
||||
CodePointArrayView(wordCodePoints, codePointCount));
|
||||
wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
|
||||
outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
|
||||
outShortcutProbabilities);
|
||||
|
@ -366,7 +369,8 @@ static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz,
|
|||
// Use 1 for count to indicate the word has inputted.
|
||||
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
|
||||
isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
|
||||
return dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
|
||||
return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
|
||||
&unigramProperty);
|
||||
}
|
||||
|
||||
static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
|
||||
|
@ -378,7 +382,7 @@ static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass cla
|
|||
jsize codePointCount = env->GetArrayLength(word);
|
||||
int codePoints[codePointCount];
|
||||
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
|
||||
return dictionary->removeUnigramEntry(codePoints, codePointCount);
|
||||
return dictionary->removeUnigramEntry(CodePointArrayView(codePoints, codePointCount));
|
||||
}
|
||||
|
||||
static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
|
||||
|
@ -410,10 +414,11 @@ static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz
|
|||
}
|
||||
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
|
||||
prevWordCodePointArrays, isBeginningOfSentenceArray);
|
||||
jsize wordLength = env->GetArrayLength(word);
|
||||
int wordCodePoints[wordLength];
|
||||
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
|
||||
return dictionary->removeNgramEntry(&prevWordsInfo, wordCodePoints, wordLength);
|
||||
jsize codePointCount = env->GetArrayLength(word);
|
||||
int wordCodePoints[codePointCount];
|
||||
env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
|
||||
return dictionary->removeNgramEntry(&prevWordsInfo,
|
||||
CodePointArrayView(wordCodePoints, codePointCount));
|
||||
}
|
||||
|
||||
// Returns how many language model params are processed.
|
||||
|
@ -484,7 +489,8 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
|||
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
|
||||
isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
|
||||
&shortcuts);
|
||||
dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty);
|
||||
dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
|
||||
&unigramProperty);
|
||||
if (word0) {
|
||||
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
|
||||
const std::vector<int> bigramTargetCodePoints(
|
||||
|
@ -568,8 +574,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
|
|||
// Add unigrams.
|
||||
do {
|
||||
token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
|
||||
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
|
||||
wordCodePointCount);
|
||||
const WordProperty wordProperty = dictionary->getWordProperty(
|
||||
CodePointArrayView(wordCodePoints, wordCodePointCount));
|
||||
if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
|
||||
// Skip beginning-of-sentence unigram.
|
||||
continue;
|
||||
|
@ -593,8 +599,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
|
|||
// Add bigrams.
|
||||
do {
|
||||
token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
|
||||
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
|
||||
wordCodePointCount);
|
||||
const WordProperty wordProperty = dictionary->getWordProperty(
|
||||
CodePointArrayView(wordCodePoints, wordCodePointCount));
|
||||
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
|
||||
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
|
||||
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
|
||||
|
|
|
@ -102,21 +102,21 @@ void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
|||
mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener);
|
||||
}
|
||||
|
||||
int Dictionary::getProbability(const int *word, int length) const {
|
||||
return getNgramProbability(nullptr /* prevWordsInfo */, word, length);
|
||||
int Dictionary::getProbability(const CodePointArrayView codePoints) const {
|
||||
return getNgramProbability(nullptr /* prevWordsInfo */, codePoints);
|
||||
}
|
||||
|
||||
int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) const {
|
||||
int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const {
|
||||
TimeKeeper::setCurrentTime();
|
||||
return DictionaryUtils::getMaxProbabilityOfExactMatches(
|
||||
mDictionaryStructureWithBufferPolicy.get(), word, length);
|
||||
mDictionaryStructureWithBufferPolicy.get(), codePoints.data(), codePoints.size());
|
||||
}
|
||||
|
||||
int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
|
||||
int length) const {
|
||||
int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
|
||||
const CodePointArrayView codePoints) const {
|
||||
TimeKeeper::setCurrentTime();
|
||||
int wordId = mDictionaryStructureWithBufferPolicy->getWordId(
|
||||
CodePointArrayView(word, length), false /* forceLowerCaseSearch */);
|
||||
const int wordId = mDictionaryStructureWithBufferPolicy->getWordId(codePoints,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY;
|
||||
if (!prevWordsInfo) {
|
||||
return getDictionaryStructurePolicy()->getProbabilityOfWord(WordIdArrayView(), wordId);
|
||||
|
@ -128,7 +128,7 @@ int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, co
|
|||
return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId);
|
||||
}
|
||||
|
||||
bool Dictionary::addUnigramEntry(const int *const word, const int length,
|
||||
bool Dictionary::addUnigramEntry(const CodePointArrayView codePoints,
|
||||
const UnigramProperty *const unigramProperty) {
|
||||
if (unigramProperty->representsBeginningOfSentence()
|
||||
&& !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
|
||||
|
@ -137,14 +137,12 @@ bool Dictionary::addUnigramEntry(const int *const word, const int length,
|
|||
return false;
|
||||
}
|
||||
TimeKeeper::setCurrentTime();
|
||||
return mDictionaryStructureWithBufferPolicy->addUnigramEntry(CodePointArrayView(word, length),
|
||||
unigramProperty);
|
||||
return mDictionaryStructureWithBufferPolicy->addUnigramEntry(codePoints, unigramProperty);
|
||||
}
|
||||
|
||||
bool Dictionary::removeUnigramEntry(const int *const codePoints, const int codePointCount) {
|
||||
bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) {
|
||||
TimeKeeper::setCurrentTime();
|
||||
return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(
|
||||
CodePointArrayView(codePoints, codePointCount));
|
||||
return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints);
|
||||
}
|
||||
|
||||
bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||
|
@ -154,10 +152,9 @@ bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
|||
}
|
||||
|
||||
bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||
const int *const word, const int length) {
|
||||
const CodePointArrayView codePoints) {
|
||||
TimeKeeper::setCurrentTime();
|
||||
return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo,
|
||||
CodePointArrayView(word, length));
|
||||
return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, codePoints);
|
||||
}
|
||||
|
||||
bool Dictionary::flush(const char *const filePath) {
|
||||
|
@ -182,11 +179,9 @@ void Dictionary::getProperty(const char *const query, const int queryLength, cha
|
|||
maxResultLength);
|
||||
}
|
||||
|
||||
const WordProperty Dictionary::getWordProperty(const int *const codePoints,
|
||||
const int codePointCount) {
|
||||
const WordProperty Dictionary::getWordProperty(const CodePointArrayView codePoints) {
|
||||
TimeKeeper::setCurrentTime();
|
||||
return mDictionaryStructureWithBufferPolicy->getWordProperty(
|
||||
CodePointArrayView(codePoints, codePointCount));
|
||||
return mDictionaryStructureWithBufferPolicy->getWordProperty(codePoints);
|
||||
}
|
||||
|
||||
int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
|
||||
|
|
|
@ -72,23 +72,23 @@ class Dictionary {
|
|||
void getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
||||
SuggestionResults *const outSuggestionResults) const;
|
||||
|
||||
int getProbability(const int *word, int length) const;
|
||||
int getProbability(const CodePointArrayView codePoints) const;
|
||||
|
||||
int getMaxProbabilityOfExactMatches(const int *word, int length) const;
|
||||
int getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const;
|
||||
|
||||
int getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
|
||||
const int *word, int length) const;
|
||||
const CodePointArrayView codePoints) const;
|
||||
|
||||
bool addUnigramEntry(const int *const codePoints, const int codePointCount,
|
||||
bool addUnigramEntry(const CodePointArrayView codePoints,
|
||||
const UnigramProperty *const unigramProperty);
|
||||
|
||||
bool removeUnigramEntry(const int *const codePoints, const int codePointCount);
|
||||
bool removeUnigramEntry(const CodePointArrayView codePoints);
|
||||
|
||||
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||
const BigramProperty *const bigramProperty);
|
||||
|
||||
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
|
||||
const int length);
|
||||
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||
const CodePointArrayView codePoints);
|
||||
|
||||
bool flush(const char *const filePath);
|
||||
|
||||
|
@ -99,7 +99,7 @@ class Dictionary {
|
|||
void getProperty(const char *const query, const int queryLength, char *const outResult,
|
||||
const int maxResultLength);
|
||||
|
||||
const WordProperty getWordProperty(const int *const codePoints, const int codePointCount);
|
||||
const WordProperty getWordProperty(const CodePointArrayView codePoints);
|
||||
|
||||
// Method to iterate all words in the dictionary.
|
||||
// The returned token has to be used to get the next word. If token is 0, this method newly
|
||||
|
|
Loading…
Reference in a new issue