Merge "Make addUnigramWord use UnigramProperty."

This commit is contained in:
Keisuke Kuroyanagi 2014-04-14 11:03:29 +00:00 committed by Android (Google) Code Review
commit d4e54af0ba
8 changed files with 104 additions and 65 deletions

View file

@ -19,16 +19,19 @@
#include "com_android_inputmethod_latin_BinaryDictionary.h" #include "com_android_inputmethod_latin_BinaryDictionary.h"
#include <cstring> // for memset() #include <cstring> // for memset()
#include <vector>
#include "defines.h" #include "defines.h"
#include "jni.h" #include "jni.h"
#include "jni_common.h" #include "jni_common.h"
#include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/result/suggestion_results.h" #include "suggest/core/result/suggestion_results.h"
#include "suggest/core/suggest_options.h" #include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" #include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
#include "utils/char_utils.h" #include "utils/char_utils.h"
#include "utils/jni_data_utils.h"
#include "utils/time_keeper.h" #include "utils/time_keeper.h"
namespace latinime { namespace latinime {
@ -288,22 +291,24 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
} }
static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict, static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
jintArray word, jint probability, jintArray shortcutTarget, jint shortuctProbability, jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) { jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) { if (!dictionary) {
return; return;
} }
jsize wordLength = env->GetArrayLength(word); jsize codePointCount = env->GetArrayLength(word);
int codePoints[wordLength]; int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, wordLength, codePoints); env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0; std::vector<UnigramProperty::ShortcutProperty> shortcuts;
int shortcutTargetCodePoints[shortcutLength]; std::vector<int> shortcutTargetCodePoints;
if (shortcutTarget) { JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints); if (!shortcutTargetCodePoints.empty()) {
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
} }
dictionary->addUnigramWord(codePoints, wordLength, probability, shortcutTargetCodePoints, const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
shortcutLength, shortuctProbability, isNotAWord, isBlacklisted, timestamp); probability, timestamp, 0 /* level */, 0 /* count */, &shortcuts);
dictionary->addUnigramWord(codePoints, codePointCount, &unigramProperty);
} }
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict, static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
@ -394,15 +399,17 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId); jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId);
jintArray shortcutTarget = static_cast<jintArray>( jintArray shortcutTarget = static_cast<jintArray>(
env->GetObjectField(languageModelParam, shortcutTargetFieldId)); env->GetObjectField(languageModelParam, shortcutTargetFieldId));
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0; std::vector<UnigramProperty::ShortcutProperty> shortcuts;
int shortcutTargetCodePoints[shortcutLength]; std::vector<int> shortcutTargetCodePoints;
if (shortcutTarget) { JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints); if (!shortcutTargetCodePoints.empty()) {
jint shortcutProbability =
env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
} }
jint shortcutProbability = env->GetIntField(languageModelParam, shortcutProbabilityFieldId); const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability, unigramProbability, timestamp, 0 /* level */, 0 /* count */, &shortcuts);
shortcutTargetCodePoints, shortcutLength, shortcutProbability, dictionary->addUnigramWord(word1CodePoints, word1Length, &unigramProperty);
isNotAWord, isBlacklisted, timestamp);
if (word0) { if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId); jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length, dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,

View file

@ -50,15 +50,10 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
TimeKeeper::setCurrentTime(); TimeKeeper::setCurrentTime();
DicTraverseSession::initSessionInstance( DicTraverseSession::initSessionInstance(
traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions); traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
if (suggestOptions->isGesture()) { const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize, ycoordinates, times, pointerIds, inputCodePoints, inputSize,
languageWeight, outSuggestionResults); languageWeight, outSuggestionResults);
} else {
mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize,
languageWeight, outSuggestionResults);
}
if (DEBUG_DICT) { if (DEBUG_DICT) {
outSuggestionResults->dumpSuggestions(); outSuggestionResults->dumpSuggestions();
} }
@ -87,14 +82,10 @@ int Dictionary::getBigramProbability(const int *word0, int length0, const int *w
return mBigramDictionary.getBigramProbability(word0, length0, word1, length1); return mBigramDictionary.getBigramProbability(word0, length0, word1, length1);
} }
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability, void Dictionary::addUnigramWord(const int *const word, const int length,
const int *const shortcutTargetCodePoints, const int shortcutLength, const UnigramProperty *const unigramProperty) {
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp) {
TimeKeeper::setCurrentTime(); TimeKeeper::setCurrentTime();
mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability, mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, unigramProperty);
shortcutTargetCodePoints, shortcutLength, shortcutProbability, isNotAWord,
isBlacklisted, timestamp);
} }
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1, void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,

View file

@ -34,7 +34,6 @@ class DicTraverseSession;
class ProximityInfo; class ProximityInfo;
class SuggestionResults; class SuggestionResults;
class SuggestOptions; class SuggestOptions;
class WordProperty;
class Dictionary { class Dictionary {
public: public:
@ -74,10 +73,8 @@ class Dictionary {
int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const; int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
void addUnigramWord(const int *const word, const int length, const int probability, void addUnigramWord(const int *const codePoints, const int codePointCount,
const int *const shortcutTargetCodePoints, const int shortcutLength, const UnigramProperty *const unigramProperty);
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp);
void addBigramWords(const int *const word0, const int length0, const int *const word1, void addBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1, const int probability, const int timestamp); const int length1, const int probability, const int timestamp);

View file

@ -29,6 +29,7 @@ class DicNodeVector;
class DictionaryBigramsStructurePolicy; class DictionaryBigramsStructurePolicy;
class DictionaryHeaderStructurePolicy; class DictionaryHeaderStructurePolicy;
class DictionaryShortcutsStructurePolicy; class DictionaryShortcutsStructurePolicy;
class UnigramProperty;
/* /*
* This class abstracts the structure of dictionaries. * This class abstracts the structure of dictionaries.
@ -69,9 +70,7 @@ class DictionaryStructureWithBufferPolicy {
// Returns whether the update was success or not. // Returns whether the update was success or not.
virtual bool addUnigramWord(const int *const word, const int length, virtual bool addUnigramWord(const int *const word, const int length,
const int probability, const int *const shortcutTargetCodePoints, const UnigramProperty *const unigramProperty) = 0;
const int shortcutLength, const int shortcutProbability, const bool isNotAWord,
const bool isBlacklisted,const int timestamp) = 0;
// Returns whether the update was success or not. // Returns whether the update was success or not.
virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1, virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,

View file

@ -81,10 +81,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return &mShortcutListPolicy; return &mShortcutListPolicy;
} }
bool addUnigramWord(const int *const word, const int length, const int probability, bool addUnigramWord(const int *const word, const int length,
const int *const shortcutTargetCodePoints, const int shortcutLength, const UnigramProperty *const unigramProperty) {
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp) {
// This method should not be called for non-updatable dictionary. // This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false; return false;

View file

@ -154,9 +154,7 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons
} }
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
const int probability, const int *const shortcutTargetCodePoints, const int shortcutLength, const UnigramProperty *const unigramProperty) {
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp) {
if (!mBuffers->isUpdatable()) { if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false; return false;
@ -170,20 +168,24 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
AKLOGE("The word is too long to insert to the dictionary, length: %d", length); AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
return false; return false;
} }
if (shortcutLength > MAX_WORD_LENGTH) { for (const auto &shortcut : unigramProperty->getShortcuts()) {
AKLOGE("The shortcutTarget is too long to insert to the dictionary, length: %d", if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
shortcutLength); AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
return false; shortcut.getTargetCodePoints()->size());
return false;
}
} }
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition()); readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false; bool addedNewUnigram = false;
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, isNotAWord, if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length,
isBlacklisted, timestamp, &addedNewUnigram)) { unigramProperty->getProbability(), unigramProperty->isNotAWord(),
unigramProperty->isBlacklisted(), unigramProperty->getTimestamp(),
&addedNewUnigram)) {
if (addedNewUnigram) { if (addedNewUnigram) {
mUnigramCount++; mUnigramCount++;
} }
if (shortcutLength > 0) { if (unigramProperty->getShortcuts().size() > 0) {
// Add shortcut target. // Add shortcut target.
const int wordPos = getTerminalPtNodePositionOfWord(word, length, const int wordPos = getTerminalPtNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */); false /* forceLowerCaseSearch */);
@ -191,11 +193,15 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
AKLOGE("Cannot find terminal PtNode position to add shortcut target."); AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
return false; return false;
} }
if (!mUpdatingHelper.addShortcutTarget(wordPos, shortcutTargetCodePoints, for (const auto &shortcut : unigramProperty->getShortcuts()) {
shortcutLength, shortcutProbability)) { if (!mUpdatingHelper.addShortcutTarget(wordPos,
AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, probability: %d", shortcut.getTargetCodePoints()->data(),
wordPos, shortcutLength, shortcutProbability); shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
return false; AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
"probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
shortcut.getProbability());
return false;
}
} }
} }
return true; return true;

View file

@ -90,10 +90,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return &mShortcutPolicy; return &mShortcutPolicy;
} }
bool addUnigramWord(const int *const word, const int length, const int probability, bool addUnigramWord(const int *const word, const int length,
const int *const shortcutTargetCodePoints, const int shortcutLength, const UnigramProperty *const unigramProperty);
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp);
bool addBigramWords(const int *const word0, const int length0, const int *const word1, bool addBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1, const int probability, const int timestamp); const int length1, const int probability, const int timestamp);

View file

@ -0,0 +1,43 @@
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_JNI_DATA_UTILS_H
#define LATINIME_JNI_DATA_UTILS_H
#include <vector>
#include "defines.h"
#include "jni.h"
namespace latinime {
class JniDataUtils {
public:
static void jintarrayToVector(JNIEnv *env, jintArray array, std::vector<int> *const outVector) {
if (!array) {
outVector->clear();
return;
}
const jsize arrayLength = env->GetArrayLength(array);
outVector->resize(arrayLength);
env->GetIntArrayRegion(array, 0 /* start */, arrayLength, outVector->data());
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils);
};
} // namespace latinime
#endif // LATINIME_JNI_DATA_UTILS_H