Merge "Make addUnigramWord use UnigramProperty."

This commit is contained in:
Keisuke Kuroyanagi 2014-04-14 11:03:29 +00:00 committed by Android (Google) Code Review
commit d4e54af0ba
8 changed files with 104 additions and 65 deletions

View file

@ -19,16 +19,19 @@
#include "com_android_inputmethod_latin_BinaryDictionary.h"
#include <cstring> // for memset()
#include <vector>
#include "defines.h"
#include "jni.h"
#include "jni_common.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/result/suggestion_results.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
#include "utils/char_utils.h"
#include "utils/jni_data_utils.h"
#include "utils/time_keeper.h"
namespace latinime {
@ -288,22 +291,24 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
}
static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
jintArray word, jint probability, jintArray shortcutTarget, jint shortuctProbability,
jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return;
}
jsize wordLength = env->GetArrayLength(word);
int codePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
int shortcutTargetCodePoints[shortcutLength];
if (shortcutTarget) {
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
jsize codePointCount = env->GetArrayLength(word);
int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
std::vector<int> shortcutTargetCodePoints;
JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
if (!shortcutTargetCodePoints.empty()) {
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
}
dictionary->addUnigramWord(codePoints, wordLength, probability, shortcutTargetCodePoints,
shortcutLength, shortuctProbability, isNotAWord, isBlacklisted, timestamp);
const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
probability, timestamp, 0 /* level */, 0 /* count */, &shortcuts);
dictionary->addUnigramWord(codePoints, codePointCount, &unigramProperty);
}
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
@ -394,15 +399,17 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId);
jintArray shortcutTarget = static_cast<jintArray>(
env->GetObjectField(languageModelParam, shortcutTargetFieldId));
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
int shortcutTargetCodePoints[shortcutLength];
if (shortcutTarget) {
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
std::vector<int> shortcutTargetCodePoints;
JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
if (!shortcutTargetCodePoints.empty()) {
jint shortcutProbability =
env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
}
jint shortcutProbability = env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability,
shortcutTargetCodePoints, shortcutLength, shortcutProbability,
isNotAWord, isBlacklisted, timestamp);
const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
unigramProbability, timestamp, 0 /* level */, 0 /* count */, &shortcuts);
dictionary->addUnigramWord(word1CodePoints, word1Length, &unigramProperty);
if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,

View file

@ -50,15 +50,10 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
TimeKeeper::setCurrentTime();
DicTraverseSession::initSessionInstance(
traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
if (suggestOptions->isGesture()) {
mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize,
languageWeight, outSuggestionResults);
} else {
mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize,
languageWeight, outSuggestionResults);
}
if (DEBUG_DICT) {
outSuggestionResults->dumpSuggestions();
}
@ -87,14 +82,10 @@ int Dictionary::getBigramProbability(const int *word0, int length0, const int *w
return mBigramDictionary.getBigramProbability(word0, length0, word1, length1);
}
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability,
const int *const shortcutTargetCodePoints, const int shortcutLength,
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp) {
void Dictionary::addUnigramWord(const int *const word, const int length,
const UnigramProperty *const unigramProperty) {
TimeKeeper::setCurrentTime();
mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability,
shortcutTargetCodePoints, shortcutLength, shortcutProbability, isNotAWord,
isBlacklisted, timestamp);
mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, unigramProperty);
}
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,

View file

@ -34,7 +34,6 @@ class DicTraverseSession;
class ProximityInfo;
class SuggestionResults;
class SuggestOptions;
class WordProperty;
class Dictionary {
public:
@ -74,10 +73,8 @@ class Dictionary {
int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
void addUnigramWord(const int *const word, const int length, const int probability,
const int *const shortcutTargetCodePoints, const int shortcutLength,
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp);
void addUnigramWord(const int *const codePoints, const int codePointCount,
const UnigramProperty *const unigramProperty);
void addBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1, const int probability, const int timestamp);

View file

@ -29,6 +29,7 @@ class DicNodeVector;
class DictionaryBigramsStructurePolicy;
class DictionaryHeaderStructurePolicy;
class DictionaryShortcutsStructurePolicy;
class UnigramProperty;
/*
* This class abstracts the structure of dictionaries.
@ -69,9 +70,7 @@ class DictionaryStructureWithBufferPolicy {
// Returns whether the update was success or not.
virtual bool addUnigramWord(const int *const word, const int length,
const int probability, const int *const shortcutTargetCodePoints,
const int shortcutLength, const int shortcutProbability, const bool isNotAWord,
const bool isBlacklisted,const int timestamp) = 0;
const UnigramProperty *const unigramProperty) = 0;
// Returns whether the update was success or not.
virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,

View file

@ -81,10 +81,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return &mShortcutListPolicy;
}
bool addUnigramWord(const int *const word, const int length, const int probability,
const int *const shortcutTargetCodePoints, const int shortcutLength,
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp) {
bool addUnigramWord(const int *const word, const int length,
const UnigramProperty *const unigramProperty) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;

View file

@ -154,9 +154,7 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons
}
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
const int probability, const int *const shortcutTargetCodePoints, const int shortcutLength,
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp) {
const UnigramProperty *const unigramProperty) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
@ -170,20 +168,24 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
return false;
}
if (shortcutLength > MAX_WORD_LENGTH) {
AKLOGE("The shortcutTarget is too long to insert to the dictionary, length: %d",
shortcutLength);
for (const auto &shortcut : unigramProperty->getShortcuts()) {
if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
shortcut.getTargetCodePoints()->size());
return false;
}
}
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false;
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, isNotAWord,
isBlacklisted, timestamp, &addedNewUnigram)) {
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length,
unigramProperty->getProbability(), unigramProperty->isNotAWord(),
unigramProperty->isBlacklisted(), unigramProperty->getTimestamp(),
&addedNewUnigram)) {
if (addedNewUnigram) {
mUnigramCount++;
}
if (shortcutLength > 0) {
if (unigramProperty->getShortcuts().size() > 0) {
// Add shortcut target.
const int wordPos = getTerminalPtNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */);
@ -191,13 +193,17 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
return false;
}
if (!mUpdatingHelper.addShortcutTarget(wordPos, shortcutTargetCodePoints,
shortcutLength, shortcutProbability)) {
AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, probability: %d",
wordPos, shortcutLength, shortcutProbability);
for (const auto &shortcut : unigramProperty->getShortcuts()) {
if (!mUpdatingHelper.addShortcutTarget(wordPos,
shortcut.getTargetCodePoints()->data(),
shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
"probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
shortcut.getProbability());
return false;
}
}
}
return true;
} else {
return false;

View file

@ -90,10 +90,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return &mShortcutPolicy;
}
bool addUnigramWord(const int *const word, const int length, const int probability,
const int *const shortcutTargetCodePoints, const int shortcutLength,
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp);
bool addUnigramWord(const int *const word, const int length,
const UnigramProperty *const unigramProperty);
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1, const int probability, const int timestamp);

View file

@ -0,0 +1,43 @@
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_JNI_DATA_UTILS_H
#define LATINIME_JNI_DATA_UTILS_H
#include <vector>
#include "defines.h"
#include "jni.h"
namespace latinime {
class JniDataUtils {
public:
static void jintarrayToVector(JNIEnv *env, jintArray array, std::vector<int> *const outVector) {
if (!array) {
outVector->clear();
return;
}
const jsize arrayLength = env->GetArrayLength(array);
outVector->resize(arrayLength);
env->GetIntArrayRegion(array, 0 /* start */, arrayLength, outVector->data());
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils);
};
} // namespace latinime
#endif // LATINIME_JNI_DATA_UTILS_H