Use CodePointArrayView in Dictionary.

Change-Id: I63fa0a8348f6de6ec7a424a8033e936b4af72beb
This commit is contained in:
Keisuke Kuroyanagi 2014-09-17 20:02:15 +09:00
parent 89a074fade
commit 3e75c59133
3 changed files with 51 additions and 50 deletions

View file

@ -259,20 +259,21 @@ static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz,
jintArray word) { jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return NOT_A_PROBABILITY; if (!dictionary) return NOT_A_PROBABILITY;
const jsize wordLength = env->GetArrayLength(word); const jsize codePointCount = env->GetArrayLength(word);
int codePoints[wordLength]; int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, wordLength, codePoints); env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
return dictionary->getProbability(codePoints, wordLength); return dictionary->getProbability(CodePointArrayView(codePoints, codePointCount));
} }
static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches( static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches(
JNIEnv *env, jclass clazz, jlong dict, jintArray word) { JNIEnv *env, jclass clazz, jlong dict, jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return NOT_A_PROBABILITY; if (!dictionary) return NOT_A_PROBABILITY;
const jsize wordLength = env->GetArrayLength(word); const jsize codePointCount = env->GetArrayLength(word);
int codePoints[wordLength]; int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, wordLength, codePoints); env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
return dictionary->getMaxProbabilityOfExactMatches(codePoints, wordLength); return dictionary->getMaxProbabilityOfExactMatches(
CodePointArrayView(codePoints, codePointCount));
} }
static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass clazz, static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass clazz,
@ -285,7 +286,8 @@ static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass cl
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env, const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
prevWordCodePointArrays, isBeginningOfSentenceArray); prevWordCodePointArrays, isBeginningOfSentenceArray);
return dictionary->getNgramProbability(&prevWordsInfo, wordCodePoints, wordLength); return dictionary->getNgramProbability(&prevWordsInfo,
CodePointArrayView(wordCodePoints, wordLength));
} }
// Method to iterate all words in the dictionary for makedict. // Method to iterate all words in the dictionary for makedict.
@ -340,7 +342,8 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
return; return;
} }
} }
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, codePointCount); const WordProperty wordProperty = dictionary->getWordProperty(
CodePointArrayView(wordCodePoints, codePointCount));
wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo, wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
outShortcutProbabilities); outShortcutProbabilities);
@ -366,7 +369,8 @@ static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz,
// Use 1 for count to indicate the word has inputted. // Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord, const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts); isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
return dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty); return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
&unigramProperty);
} }
static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass clazz, jlong dict, static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
@ -378,7 +382,7 @@ static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass cla
jsize codePointCount = env->GetArrayLength(word); jsize codePointCount = env->GetArrayLength(word);
int codePoints[codePointCount]; int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, codePoints); env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
return dictionary->removeUnigramEntry(codePoints, codePointCount); return dictionary->removeUnigramEntry(CodePointArrayView(codePoints, codePointCount));
} }
static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict, static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
@ -410,10 +414,11 @@ static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz
} }
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env, const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
prevWordCodePointArrays, isBeginningOfSentenceArray); prevWordCodePointArrays, isBeginningOfSentenceArray);
jsize wordLength = env->GetArrayLength(word); jsize codePointCount = env->GetArrayLength(word);
int wordCodePoints[wordLength]; int wordCodePoints[codePointCount];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
return dictionary->removeNgramEntry(&prevWordsInfo, wordCodePoints, wordLength); return dictionary->removeNgramEntry(&prevWordsInfo,
CodePointArrayView(wordCodePoints, codePointCount));
} }
// Returns how many language model params are processed. // Returns how many language model params are processed.
@ -484,7 +489,8 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord, const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */, isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
&shortcuts); &shortcuts);
dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty); dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
&unigramProperty);
if (word0) { if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId); jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
const std::vector<int> bigramTargetCodePoints( const std::vector<int> bigramTargetCodePoints(
@ -568,8 +574,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
// Add unigrams. // Add unigrams.
do { do {
token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount); token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, const WordProperty wordProperty = dictionary->getWordProperty(
wordCodePointCount); CodePointArrayView(wordCodePoints, wordCodePointCount));
if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) { if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
// Skip beginning-of-sentence unigram. // Skip beginning-of-sentence unigram.
continue; continue;
@ -593,8 +599,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
// Add bigrams. // Add bigrams.
do { do {
token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount); token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, const WordProperty wordProperty = dictionary->getWordProperty(
wordCodePointCount); CodePointArrayView(wordCodePoints, wordCodePointCount));
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) { if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy( dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars); std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);

View file

@ -102,21 +102,21 @@ void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener); mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener);
} }
int Dictionary::getProbability(const int *word, int length) const { int Dictionary::getProbability(const CodePointArrayView codePoints) const {
return getNgramProbability(nullptr /* prevWordsInfo */, word, length); return getNgramProbability(nullptr /* prevWordsInfo */, codePoints);
} }
int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) const { int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const {
TimeKeeper::setCurrentTime(); TimeKeeper::setCurrentTime();
return DictionaryUtils::getMaxProbabilityOfExactMatches( return DictionaryUtils::getMaxProbabilityOfExactMatches(
mDictionaryStructureWithBufferPolicy.get(), word, length); mDictionaryStructureWithBufferPolicy.get(), codePoints.data(), codePoints.size());
} }
int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word, int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
int length) const { const CodePointArrayView codePoints) const {
TimeKeeper::setCurrentTime(); TimeKeeper::setCurrentTime();
int wordId = mDictionaryStructureWithBufferPolicy->getWordId( const int wordId = mDictionaryStructureWithBufferPolicy->getWordId(codePoints,
CodePointArrayView(word, length), false /* forceLowerCaseSearch */); false /* forceLowerCaseSearch */);
if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY; if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY;
if (!prevWordsInfo) { if (!prevWordsInfo) {
return getDictionaryStructurePolicy()->getProbabilityOfWord(WordIdArrayView(), wordId); return getDictionaryStructurePolicy()->getProbabilityOfWord(WordIdArrayView(), wordId);
@ -128,7 +128,7 @@ int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, co
return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId); return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId);
} }
bool Dictionary::addUnigramEntry(const int *const word, const int length, bool Dictionary::addUnigramEntry(const CodePointArrayView codePoints,
const UnigramProperty *const unigramProperty) { const UnigramProperty *const unigramProperty) {
if (unigramProperty->representsBeginningOfSentence() if (unigramProperty->representsBeginningOfSentence()
&& !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy() && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
@ -137,14 +137,12 @@ bool Dictionary::addUnigramEntry(const int *const word, const int length,
return false; return false;
} }
TimeKeeper::setCurrentTime(); TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy->addUnigramEntry(CodePointArrayView(word, length), return mDictionaryStructureWithBufferPolicy->addUnigramEntry(codePoints, unigramProperty);
unigramProperty);
} }
bool Dictionary::removeUnigramEntry(const int *const codePoints, const int codePointCount) { bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) {
TimeKeeper::setCurrentTime(); TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy->removeUnigramEntry( return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints);
CodePointArrayView(codePoints, codePointCount));
} }
bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
@ -154,10 +152,9 @@ bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
} }
bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const int *const word, const int length) { const CodePointArrayView codePoints) {
TimeKeeper::setCurrentTime(); TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, codePoints);
CodePointArrayView(word, length));
} }
bool Dictionary::flush(const char *const filePath) { bool Dictionary::flush(const char *const filePath) {
@ -182,11 +179,9 @@ void Dictionary::getProperty(const char *const query, const int queryLength, cha
maxResultLength); maxResultLength);
} }
const WordProperty Dictionary::getWordProperty(const int *const codePoints, const WordProperty Dictionary::getWordProperty(const CodePointArrayView codePoints) {
const int codePointCount) {
TimeKeeper::setCurrentTime(); TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy->getWordProperty( return mDictionaryStructureWithBufferPolicy->getWordProperty(codePoints);
CodePointArrayView(codePoints, codePointCount));
} }
int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints, int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,

View file

@ -72,23 +72,23 @@ class Dictionary {
void getPredictions(const PrevWordsInfo *const prevWordsInfo, void getPredictions(const PrevWordsInfo *const prevWordsInfo,
SuggestionResults *const outSuggestionResults) const; SuggestionResults *const outSuggestionResults) const;
int getProbability(const int *word, int length) const; int getProbability(const CodePointArrayView codePoints) const;
int getMaxProbabilityOfExactMatches(const int *word, int length) const; int getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const;
int getNgramProbability(const PrevWordsInfo *const prevWordsInfo, int getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
const int *word, int length) const; const CodePointArrayView codePoints) const;
bool addUnigramEntry(const int *const codePoints, const int codePointCount, bool addUnigramEntry(const CodePointArrayView codePoints,
const UnigramProperty *const unigramProperty); const UnigramProperty *const unigramProperty);
bool removeUnigramEntry(const int *const codePoints, const int codePointCount); bool removeUnigramEntry(const CodePointArrayView codePoints);
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty); const BigramProperty *const bigramProperty);
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word, bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const int length); const CodePointArrayView codePoints);
bool flush(const char *const filePath); bool flush(const char *const filePath);
@ -99,7 +99,7 @@ class Dictionary {
void getProperty(const char *const query, const int queryLength, char *const outResult, void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength); const int maxResultLength);
const WordProperty getWordProperty(const int *const codePoints, const int codePointCount); const WordProperty getWordProperty(const CodePointArrayView codePoints);
// Method to iterate all words in the dictionary. // Method to iterate all words in the dictionary.
// The returned token has to be used to get the next word. If token is 0, this method newly // The returned token has to be used to get the next word. If token is 0, this method newly