Use CodePointArrayView in Dictionary.

Change-Id: I63fa0a8348f6de6ec7a424a8033e936b4af72beb
This commit is contained in:
Keisuke Kuroyanagi 2014-09-17 20:02:15 +09:00
parent 89a074fade
commit 3e75c59133
3 changed files with 51 additions and 50 deletions

View file

@ -259,20 +259,21 @@ static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz,
jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return NOT_A_PROBABILITY;
const jsize wordLength = env->GetArrayLength(word);
int codePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
return dictionary->getProbability(codePoints, wordLength);
const jsize codePointCount = env->GetArrayLength(word);
int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
return dictionary->getProbability(CodePointArrayView(codePoints, codePointCount));
}
static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches(
JNIEnv *env, jclass clazz, jlong dict, jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return NOT_A_PROBABILITY;
const jsize wordLength = env->GetArrayLength(word);
int codePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
return dictionary->getMaxProbabilityOfExactMatches(codePoints, wordLength);
const jsize codePointCount = env->GetArrayLength(word);
int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
return dictionary->getMaxProbabilityOfExactMatches(
CodePointArrayView(codePoints, codePointCount));
}
static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass clazz,
@ -285,7 +286,8 @@ static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass cl
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
prevWordCodePointArrays, isBeginningOfSentenceArray);
return dictionary->getNgramProbability(&prevWordsInfo, wordCodePoints, wordLength);
return dictionary->getNgramProbability(&prevWordsInfo,
CodePointArrayView(wordCodePoints, wordLength));
}
// Method to iterate all words in the dictionary for makedict.
@ -340,7 +342,8 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
return;
}
}
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, codePointCount);
const WordProperty wordProperty = dictionary->getWordProperty(
CodePointArrayView(wordCodePoints, codePointCount));
wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
outShortcutProbabilities);
@ -366,7 +369,8 @@ static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz,
// Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
return dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
&unigramProperty);
}
static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
@ -378,7 +382,7 @@ static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass cla
jsize codePointCount = env->GetArrayLength(word);
int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
return dictionary->removeUnigramEntry(codePoints, codePointCount);
return dictionary->removeUnigramEntry(CodePointArrayView(codePoints, codePointCount));
}
static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
@ -410,10 +414,11 @@ static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz
}
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
prevWordCodePointArrays, isBeginningOfSentenceArray);
jsize wordLength = env->GetArrayLength(word);
int wordCodePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
return dictionary->removeNgramEntry(&prevWordsInfo, wordCodePoints, wordLength);
jsize codePointCount = env->GetArrayLength(word);
int wordCodePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
return dictionary->removeNgramEntry(&prevWordsInfo,
CodePointArrayView(wordCodePoints, codePointCount));
}
// Returns how many language model params are processed.
@ -484,7 +489,8 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
&shortcuts);
dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty);
dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
&unigramProperty);
if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
const std::vector<int> bigramTargetCodePoints(
@ -568,8 +574,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
// Add unigrams.
do {
token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
wordCodePointCount);
const WordProperty wordProperty = dictionary->getWordProperty(
CodePointArrayView(wordCodePoints, wordCodePointCount));
if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
// Skip beginning-of-sentence unigram.
continue;
@ -593,8 +599,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
// Add bigrams.
do {
token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
wordCodePointCount);
const WordProperty wordProperty = dictionary->getWordProperty(
CodePointArrayView(wordCodePoints, wordCodePointCount));
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);

View file

@ -102,21 +102,21 @@ void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener);
}
int Dictionary::getProbability(const int *word, int length) const {
return getNgramProbability(nullptr /* prevWordsInfo */, word, length);
int Dictionary::getProbability(const CodePointArrayView codePoints) const {
return getNgramProbability(nullptr /* prevWordsInfo */, codePoints);
}
int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) const {
int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const {
TimeKeeper::setCurrentTime();
return DictionaryUtils::getMaxProbabilityOfExactMatches(
mDictionaryStructureWithBufferPolicy.get(), word, length);
mDictionaryStructureWithBufferPolicy.get(), codePoints.data(), codePoints.size());
}
int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
int length) const {
int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView codePoints) const {
TimeKeeper::setCurrentTime();
int wordId = mDictionaryStructureWithBufferPolicy->getWordId(
CodePointArrayView(word, length), false /* forceLowerCaseSearch */);
const int wordId = mDictionaryStructureWithBufferPolicy->getWordId(codePoints,
false /* forceLowerCaseSearch */);
if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY;
if (!prevWordsInfo) {
return getDictionaryStructurePolicy()->getProbabilityOfWord(WordIdArrayView(), wordId);
@ -128,7 +128,7 @@ int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, co
return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId);
}
bool Dictionary::addUnigramEntry(const int *const word, const int length,
bool Dictionary::addUnigramEntry(const CodePointArrayView codePoints,
const UnigramProperty *const unigramProperty) {
if (unigramProperty->representsBeginningOfSentence()
&& !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
@ -137,14 +137,12 @@ bool Dictionary::addUnigramEntry(const int *const word, const int length,
return false;
}
TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy->addUnigramEntry(CodePointArrayView(word, length),
unigramProperty);
return mDictionaryStructureWithBufferPolicy->addUnigramEntry(codePoints, unigramProperty);
}
bool Dictionary::removeUnigramEntry(const int *const codePoints, const int codePointCount) {
bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) {
TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(
CodePointArrayView(codePoints, codePointCount));
return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints);
}
bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
@ -154,10 +152,9 @@ bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
}
bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const int *const word, const int length) {
const CodePointArrayView codePoints) {
TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo,
CodePointArrayView(word, length));
return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, codePoints);
}
bool Dictionary::flush(const char *const filePath) {
@ -182,11 +179,9 @@ void Dictionary::getProperty(const char *const query, const int queryLength, cha
maxResultLength);
}
const WordProperty Dictionary::getWordProperty(const int *const codePoints,
const int codePointCount) {
const WordProperty Dictionary::getWordProperty(const CodePointArrayView codePoints) {
TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy->getWordProperty(
CodePointArrayView(codePoints, codePointCount));
return mDictionaryStructureWithBufferPolicy->getWordProperty(codePoints);
}
int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,

View file

@ -72,23 +72,23 @@ class Dictionary {
void getPredictions(const PrevWordsInfo *const prevWordsInfo,
SuggestionResults *const outSuggestionResults) const;
int getProbability(const int *word, int length) const;
int getProbability(const CodePointArrayView codePoints) const;
int getMaxProbabilityOfExactMatches(const int *word, int length) const;
int getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const;
int getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
const int *word, int length) const;
const CodePointArrayView codePoints) const;
bool addUnigramEntry(const int *const codePoints, const int codePointCount,
bool addUnigramEntry(const CodePointArrayView codePoints,
const UnigramProperty *const unigramProperty);
bool removeUnigramEntry(const int *const codePoints, const int codePointCount);
bool removeUnigramEntry(const CodePointArrayView codePoints);
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty);
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
const int length);
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView codePoints);
bool flush(const char *const filePath);
@ -99,7 +99,7 @@ class Dictionary {
void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength);
const WordProperty getWordProperty(const int *const codePoints, const int codePointCount);
const WordProperty getWordProperty(const CodePointArrayView codePoints);
// Method to iterate all words in the dictionary.
// The returned token has to be used to get the next word. If token is 0, this method newly