am d4e54af0
: Merge "Make addUnigramWord use UnigramProperty."
* commit 'd4e54af0bad9cdee02756f4973fb48670005e31a': Make addUnigramWord use UnigramProperty.
This commit is contained in:
commit
c5f2b359a9
8 changed files with 104 additions and 65 deletions
|
@ -19,16 +19,19 @@
|
|||
#include "com_android_inputmethod_latin_BinaryDictionary.h"
|
||||
|
||||
#include <cstring> // for memset()
|
||||
#include <vector>
|
||||
|
||||
#include "defines.h"
|
||||
#include "jni.h"
|
||||
#include "jni_common.h"
|
||||
#include "suggest/core/dictionary/dictionary.h"
|
||||
#include "suggest/core/dictionary/property/unigram_property.h"
|
||||
#include "suggest/core/dictionary/property/word_property.h"
|
||||
#include "suggest/core/result/suggestion_results.h"
|
||||
#include "suggest/core/suggest_options.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
|
||||
#include "utils/char_utils.h"
|
||||
#include "utils/jni_data_utils.h"
|
||||
#include "utils/time_keeper.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -288,22 +291,24 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
|
|||
}
|
||||
|
||||
static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
|
||||
jintArray word, jint probability, jintArray shortcutTarget, jint shortuctProbability,
|
||||
jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
|
||||
jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) {
|
||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||
if (!dictionary) {
|
||||
return;
|
||||
}
|
||||
jsize wordLength = env->GetArrayLength(word);
|
||||
int codePoints[wordLength];
|
||||
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
|
||||
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
|
||||
int shortcutTargetCodePoints[shortcutLength];
|
||||
if (shortcutTarget) {
|
||||
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
|
||||
jsize codePointCount = env->GetArrayLength(word);
|
||||
int codePoints[codePointCount];
|
||||
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
|
||||
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
||||
std::vector<int> shortcutTargetCodePoints;
|
||||
JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
|
||||
if (!shortcutTargetCodePoints.empty()) {
|
||||
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
|
||||
}
|
||||
dictionary->addUnigramWord(codePoints, wordLength, probability, shortcutTargetCodePoints,
|
||||
shortcutLength, shortuctProbability, isNotAWord, isBlacklisted, timestamp);
|
||||
const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
|
||||
probability, timestamp, 0 /* level */, 0 /* count */, &shortcuts);
|
||||
dictionary->addUnigramWord(codePoints, codePointCount, &unigramProperty);
|
||||
}
|
||||
|
||||
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
|
||||
|
@ -394,15 +399,17 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
|||
jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId);
|
||||
jintArray shortcutTarget = static_cast<jintArray>(
|
||||
env->GetObjectField(languageModelParam, shortcutTargetFieldId));
|
||||
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
|
||||
int shortcutTargetCodePoints[shortcutLength];
|
||||
if (shortcutTarget) {
|
||||
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
|
||||
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
||||
std::vector<int> shortcutTargetCodePoints;
|
||||
JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
|
||||
if (!shortcutTargetCodePoints.empty()) {
|
||||
jint shortcutProbability =
|
||||
env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
|
||||
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
|
||||
}
|
||||
jint shortcutProbability = env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
|
||||
dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability,
|
||||
shortcutTargetCodePoints, shortcutLength, shortcutProbability,
|
||||
isNotAWord, isBlacklisted, timestamp);
|
||||
const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
|
||||
unigramProbability, timestamp, 0 /* level */, 0 /* count */, &shortcuts);
|
||||
dictionary->addUnigramWord(word1CodePoints, word1Length, &unigramProperty);
|
||||
if (word0) {
|
||||
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
|
||||
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,
|
||||
|
|
|
@ -50,15 +50,10 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
|
|||
TimeKeeper::setCurrentTime();
|
||||
DicTraverseSession::initSessionInstance(
|
||||
traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
|
||||
if (suggestOptions->isGesture()) {
|
||||
mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
|
||||
ycoordinates, times, pointerIds, inputCodePoints, inputSize,
|
||||
languageWeight, outSuggestionResults);
|
||||
} else {
|
||||
mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
|
||||
ycoordinates, times, pointerIds, inputCodePoints, inputSize,
|
||||
languageWeight, outSuggestionResults);
|
||||
}
|
||||
const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
|
||||
suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
|
||||
ycoordinates, times, pointerIds, inputCodePoints, inputSize,
|
||||
languageWeight, outSuggestionResults);
|
||||
if (DEBUG_DICT) {
|
||||
outSuggestionResults->dumpSuggestions();
|
||||
}
|
||||
|
@ -87,14 +82,10 @@ int Dictionary::getBigramProbability(const int *word0, int length0, const int *w
|
|||
return mBigramDictionary.getBigramProbability(word0, length0, word1, length1);
|
||||
}
|
||||
|
||||
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability,
|
||||
const int *const shortcutTargetCodePoints, const int shortcutLength,
|
||||
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
||||
const int timestamp) {
|
||||
void Dictionary::addUnigramWord(const int *const word, const int length,
|
||||
const UnigramProperty *const unigramProperty) {
|
||||
TimeKeeper::setCurrentTime();
|
||||
mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability,
|
||||
shortcutTargetCodePoints, shortcutLength, shortcutProbability, isNotAWord,
|
||||
isBlacklisted, timestamp);
|
||||
mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, unigramProperty);
|
||||
}
|
||||
|
||||
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||
|
|
|
@ -34,7 +34,6 @@ class DicTraverseSession;
|
|||
class ProximityInfo;
|
||||
class SuggestionResults;
|
||||
class SuggestOptions;
|
||||
class WordProperty;
|
||||
|
||||
class Dictionary {
|
||||
public:
|
||||
|
@ -74,10 +73,8 @@ class Dictionary {
|
|||
|
||||
int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
|
||||
|
||||
void addUnigramWord(const int *const word, const int length, const int probability,
|
||||
const int *const shortcutTargetCodePoints, const int shortcutLength,
|
||||
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
||||
const int timestamp);
|
||||
void addUnigramWord(const int *const codePoints, const int codePointCount,
|
||||
const UnigramProperty *const unigramProperty);
|
||||
|
||||
void addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||
const int length1, const int probability, const int timestamp);
|
||||
|
|
|
@ -29,6 +29,7 @@ class DicNodeVector;
|
|||
class DictionaryBigramsStructurePolicy;
|
||||
class DictionaryHeaderStructurePolicy;
|
||||
class DictionaryShortcutsStructurePolicy;
|
||||
class UnigramProperty;
|
||||
|
||||
/*
|
||||
* This class abstracts the structure of dictionaries.
|
||||
|
@ -69,9 +70,7 @@ class DictionaryStructureWithBufferPolicy {
|
|||
|
||||
// Returns whether the update was success or not.
|
||||
virtual bool addUnigramWord(const int *const word, const int length,
|
||||
const int probability, const int *const shortcutTargetCodePoints,
|
||||
const int shortcutLength, const int shortcutProbability, const bool isNotAWord,
|
||||
const bool isBlacklisted,const int timestamp) = 0;
|
||||
const UnigramProperty *const unigramProperty) = 0;
|
||||
|
||||
// Returns whether the update was success or not.
|
||||
virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||
|
|
|
@ -81,10 +81,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
return &mShortcutListPolicy;
|
||||
}
|
||||
|
||||
bool addUnigramWord(const int *const word, const int length, const int probability,
|
||||
const int *const shortcutTargetCodePoints, const int shortcutLength,
|
||||
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
||||
const int timestamp) {
|
||||
bool addUnigramWord(const int *const word, const int length,
|
||||
const UnigramProperty *const unigramProperty) {
|
||||
// This method should not be called for non-updatable dictionary.
|
||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
|
|
|
@ -154,9 +154,7 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons
|
|||
}
|
||||
|
||||
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
||||
const int probability, const int *const shortcutTargetCodePoints, const int shortcutLength,
|
||||
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
||||
const int timestamp) {
|
||||
const UnigramProperty *const unigramProperty) {
|
||||
if (!mBuffers->isUpdatable()) {
|
||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
|
@ -170,20 +168,24 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
|
|||
AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
|
||||
return false;
|
||||
}
|
||||
if (shortcutLength > MAX_WORD_LENGTH) {
|
||||
AKLOGE("The shortcutTarget is too long to insert to the dictionary, length: %d",
|
||||
shortcutLength);
|
||||
return false;
|
||||
for (const auto &shortcut : unigramProperty->getShortcuts()) {
|
||||
if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
|
||||
AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
|
||||
shortcut.getTargetCodePoints()->size());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
|
||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||
bool addedNewUnigram = false;
|
||||
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, isNotAWord,
|
||||
isBlacklisted, timestamp, &addedNewUnigram)) {
|
||||
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length,
|
||||
unigramProperty->getProbability(), unigramProperty->isNotAWord(),
|
||||
unigramProperty->isBlacklisted(), unigramProperty->getTimestamp(),
|
||||
&addedNewUnigram)) {
|
||||
if (addedNewUnigram) {
|
||||
mUnigramCount++;
|
||||
}
|
||||
if (shortcutLength > 0) {
|
||||
if (unigramProperty->getShortcuts().size() > 0) {
|
||||
// Add shortcut target.
|
||||
const int wordPos = getTerminalPtNodePositionOfWord(word, length,
|
||||
false /* forceLowerCaseSearch */);
|
||||
|
@ -191,11 +193,15 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
|
|||
AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
|
||||
return false;
|
||||
}
|
||||
if (!mUpdatingHelper.addShortcutTarget(wordPos, shortcutTargetCodePoints,
|
||||
shortcutLength, shortcutProbability)) {
|
||||
AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, probability: %d",
|
||||
wordPos, shortcutLength, shortcutProbability);
|
||||
return false;
|
||||
for (const auto &shortcut : unigramProperty->getShortcuts()) {
|
||||
if (!mUpdatingHelper.addShortcutTarget(wordPos,
|
||||
shortcut.getTargetCodePoints()->data(),
|
||||
shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
|
||||
AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
|
||||
"probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
|
||||
shortcut.getProbability());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
|
|
@ -90,10 +90,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
return &mShortcutPolicy;
|
||||
}
|
||||
|
||||
bool addUnigramWord(const int *const word, const int length, const int probability,
|
||||
const int *const shortcutTargetCodePoints, const int shortcutLength,
|
||||
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
||||
const int timestamp);
|
||||
bool addUnigramWord(const int *const word, const int length,
|
||||
const UnigramProperty *const unigramProperty);
|
||||
|
||||
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||
const int length1, const int probability, const int timestamp);
|
||||
|
|
43
native/jni/src/utils/jni_data_utils.h
Normal file
43
native/jni/src/utils/jni_data_utils.h
Normal file
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright (C) 2014 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_JNI_DATA_UTILS_H
|
||||
#define LATINIME_JNI_DATA_UTILS_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "defines.h"
|
||||
#include "jni.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class JniDataUtils {
|
||||
public:
|
||||
static void jintarrayToVector(JNIEnv *env, jintArray array, std::vector<int> *const outVector) {
|
||||
if (!array) {
|
||||
outVector->clear();
|
||||
return;
|
||||
}
|
||||
const jsize arrayLength = env->GetArrayLength(array);
|
||||
outVector->resize(arrayLength);
|
||||
env->GetIntArrayRegion(array, 0 /* start */, arrayLength, outVector->data());
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils);
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_JNI_DATA_UTILS_H
|
Loading…
Reference in a new issue