Merge "Make addUnigramWord use UnigramProperty."
This commit is contained in:
commit
d4e54af0ba
8 changed files with 104 additions and 65 deletions
|
@ -19,16 +19,19 @@
|
||||||
#include "com_android_inputmethod_latin_BinaryDictionary.h"
|
#include "com_android_inputmethod_latin_BinaryDictionary.h"
|
||||||
|
|
||||||
#include <cstring> // for memset()
|
#include <cstring> // for memset()
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "jni.h"
|
#include "jni.h"
|
||||||
#include "jni_common.h"
|
#include "jni_common.h"
|
||||||
#include "suggest/core/dictionary/dictionary.h"
|
#include "suggest/core/dictionary/dictionary.h"
|
||||||
|
#include "suggest/core/dictionary/property/unigram_property.h"
|
||||||
#include "suggest/core/dictionary/property/word_property.h"
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
#include "suggest/core/result/suggestion_results.h"
|
#include "suggest/core/result/suggestion_results.h"
|
||||||
#include "suggest/core/suggest_options.h"
|
#include "suggest/core/suggest_options.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
|
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
|
||||||
#include "utils/char_utils.h"
|
#include "utils/char_utils.h"
|
||||||
|
#include "utils/jni_data_utils.h"
|
||||||
#include "utils/time_keeper.h"
|
#include "utils/time_keeper.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -288,22 +291,24 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
|
static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
|
||||||
jintArray word, jint probability, jintArray shortcutTarget, jint shortuctProbability,
|
jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
|
||||||
jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) {
|
jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) {
|
||||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||||
if (!dictionary) {
|
if (!dictionary) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
jsize wordLength = env->GetArrayLength(word);
|
jsize codePointCount = env->GetArrayLength(word);
|
||||||
int codePoints[wordLength];
|
int codePoints[codePointCount];
|
||||||
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
|
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
|
||||||
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
|
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
||||||
int shortcutTargetCodePoints[shortcutLength];
|
std::vector<int> shortcutTargetCodePoints;
|
||||||
if (shortcutTarget) {
|
JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
|
||||||
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
|
if (!shortcutTargetCodePoints.empty()) {
|
||||||
|
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
|
||||||
}
|
}
|
||||||
dictionary->addUnigramWord(codePoints, wordLength, probability, shortcutTargetCodePoints,
|
const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
|
||||||
shortcutLength, shortuctProbability, isNotAWord, isBlacklisted, timestamp);
|
probability, timestamp, 0 /* level */, 0 /* count */, &shortcuts);
|
||||||
|
dictionary->addUnigramWord(codePoints, codePointCount, &unigramProperty);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
|
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
|
||||||
|
@ -394,15 +399,17 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
||||||
jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId);
|
jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId);
|
||||||
jintArray shortcutTarget = static_cast<jintArray>(
|
jintArray shortcutTarget = static_cast<jintArray>(
|
||||||
env->GetObjectField(languageModelParam, shortcutTargetFieldId));
|
env->GetObjectField(languageModelParam, shortcutTargetFieldId));
|
||||||
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
|
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
||||||
int shortcutTargetCodePoints[shortcutLength];
|
std::vector<int> shortcutTargetCodePoints;
|
||||||
if (shortcutTarget) {
|
JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
|
||||||
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
|
if (!shortcutTargetCodePoints.empty()) {
|
||||||
|
jint shortcutProbability =
|
||||||
|
env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
|
||||||
|
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
|
||||||
}
|
}
|
||||||
jint shortcutProbability = env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
|
const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
|
||||||
dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability,
|
unigramProbability, timestamp, 0 /* level */, 0 /* count */, &shortcuts);
|
||||||
shortcutTargetCodePoints, shortcutLength, shortcutProbability,
|
dictionary->addUnigramWord(word1CodePoints, word1Length, &unigramProperty);
|
||||||
isNotAWord, isBlacklisted, timestamp);
|
|
||||||
if (word0) {
|
if (word0) {
|
||||||
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
|
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
|
||||||
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,
|
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,
|
||||||
|
|
|
@ -50,15 +50,10 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
DicTraverseSession::initSessionInstance(
|
DicTraverseSession::initSessionInstance(
|
||||||
traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
|
traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
|
||||||
if (suggestOptions->isGesture()) {
|
const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
|
||||||
mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
|
suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
|
||||||
ycoordinates, times, pointerIds, inputCodePoints, inputSize,
|
ycoordinates, times, pointerIds, inputCodePoints, inputSize,
|
||||||
languageWeight, outSuggestionResults);
|
languageWeight, outSuggestionResults);
|
||||||
} else {
|
|
||||||
mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
|
|
||||||
ycoordinates, times, pointerIds, inputCodePoints, inputSize,
|
|
||||||
languageWeight, outSuggestionResults);
|
|
||||||
}
|
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
outSuggestionResults->dumpSuggestions();
|
outSuggestionResults->dumpSuggestions();
|
||||||
}
|
}
|
||||||
|
@ -87,14 +82,10 @@ int Dictionary::getBigramProbability(const int *word0, int length0, const int *w
|
||||||
return mBigramDictionary.getBigramProbability(word0, length0, word1, length1);
|
return mBigramDictionary.getBigramProbability(word0, length0, word1, length1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability,
|
void Dictionary::addUnigramWord(const int *const word, const int length,
|
||||||
const int *const shortcutTargetCodePoints, const int shortcutLength,
|
const UnigramProperty *const unigramProperty) {
|
||||||
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
|
||||||
const int timestamp) {
|
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability,
|
mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, unigramProperty);
|
||||||
shortcutTargetCodePoints, shortcutLength, shortcutProbability, isNotAWord,
|
|
||||||
isBlacklisted, timestamp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
|
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||||
|
|
|
@ -34,7 +34,6 @@ class DicTraverseSession;
|
||||||
class ProximityInfo;
|
class ProximityInfo;
|
||||||
class SuggestionResults;
|
class SuggestionResults;
|
||||||
class SuggestOptions;
|
class SuggestOptions;
|
||||||
class WordProperty;
|
|
||||||
|
|
||||||
class Dictionary {
|
class Dictionary {
|
||||||
public:
|
public:
|
||||||
|
@ -74,10 +73,8 @@ class Dictionary {
|
||||||
|
|
||||||
int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
|
int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
|
||||||
|
|
||||||
void addUnigramWord(const int *const word, const int length, const int probability,
|
void addUnigramWord(const int *const codePoints, const int codePointCount,
|
||||||
const int *const shortcutTargetCodePoints, const int shortcutLength,
|
const UnigramProperty *const unigramProperty);
|
||||||
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
|
||||||
const int timestamp);
|
|
||||||
|
|
||||||
void addBigramWords(const int *const word0, const int length0, const int *const word1,
|
void addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||||
const int length1, const int probability, const int timestamp);
|
const int length1, const int probability, const int timestamp);
|
||||||
|
|
|
@ -29,6 +29,7 @@ class DicNodeVector;
|
||||||
class DictionaryBigramsStructurePolicy;
|
class DictionaryBigramsStructurePolicy;
|
||||||
class DictionaryHeaderStructurePolicy;
|
class DictionaryHeaderStructurePolicy;
|
||||||
class DictionaryShortcutsStructurePolicy;
|
class DictionaryShortcutsStructurePolicy;
|
||||||
|
class UnigramProperty;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This class abstracts the structure of dictionaries.
|
* This class abstracts the structure of dictionaries.
|
||||||
|
@ -69,9 +70,7 @@ class DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
// Returns whether the update was success or not.
|
// Returns whether the update was success or not.
|
||||||
virtual bool addUnigramWord(const int *const word, const int length,
|
virtual bool addUnigramWord(const int *const word, const int length,
|
||||||
const int probability, const int *const shortcutTargetCodePoints,
|
const UnigramProperty *const unigramProperty) = 0;
|
||||||
const int shortcutLength, const int shortcutProbability, const bool isNotAWord,
|
|
||||||
const bool isBlacklisted,const int timestamp) = 0;
|
|
||||||
|
|
||||||
// Returns whether the update was success or not.
|
// Returns whether the update was success or not.
|
||||||
virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||||
|
|
|
@ -81,10 +81,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
return &mShortcutListPolicy;
|
return &mShortcutListPolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool addUnigramWord(const int *const word, const int length, const int probability,
|
bool addUnigramWord(const int *const word, const int length,
|
||||||
const int *const shortcutTargetCodePoints, const int shortcutLength,
|
const UnigramProperty *const unigramProperty) {
|
||||||
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
|
||||||
const int timestamp) {
|
|
||||||
// This method should not be called for non-updatable dictionary.
|
// This method should not be called for non-updatable dictionary.
|
||||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -154,9 +154,7 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
||||||
const int probability, const int *const shortcutTargetCodePoints, const int shortcutLength,
|
const UnigramProperty *const unigramProperty) {
|
||||||
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
|
||||||
const int timestamp) {
|
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
|
@ -170,20 +168,24 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
|
||||||
AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
|
AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (shortcutLength > MAX_WORD_LENGTH) {
|
for (const auto &shortcut : unigramProperty->getShortcuts()) {
|
||||||
AKLOGE("The shortcutTarget is too long to insert to the dictionary, length: %d",
|
if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
|
||||||
shortcutLength);
|
AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
|
||||||
return false;
|
shortcut.getTargetCodePoints()->size());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
|
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
bool addedNewUnigram = false;
|
bool addedNewUnigram = false;
|
||||||
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, isNotAWord,
|
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length,
|
||||||
isBlacklisted, timestamp, &addedNewUnigram)) {
|
unigramProperty->getProbability(), unigramProperty->isNotAWord(),
|
||||||
|
unigramProperty->isBlacklisted(), unigramProperty->getTimestamp(),
|
||||||
|
&addedNewUnigram)) {
|
||||||
if (addedNewUnigram) {
|
if (addedNewUnigram) {
|
||||||
mUnigramCount++;
|
mUnigramCount++;
|
||||||
}
|
}
|
||||||
if (shortcutLength > 0) {
|
if (unigramProperty->getShortcuts().size() > 0) {
|
||||||
// Add shortcut target.
|
// Add shortcut target.
|
||||||
const int wordPos = getTerminalPtNodePositionOfWord(word, length,
|
const int wordPos = getTerminalPtNodePositionOfWord(word, length,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
|
@ -191,11 +193,15 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
|
||||||
AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
|
AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!mUpdatingHelper.addShortcutTarget(wordPos, shortcutTargetCodePoints,
|
for (const auto &shortcut : unigramProperty->getShortcuts()) {
|
||||||
shortcutLength, shortcutProbability)) {
|
if (!mUpdatingHelper.addShortcutTarget(wordPos,
|
||||||
AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, probability: %d",
|
shortcut.getTargetCodePoints()->data(),
|
||||||
wordPos, shortcutLength, shortcutProbability);
|
shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
|
||||||
return false;
|
AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
|
||||||
|
"probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
|
||||||
|
shortcut.getProbability());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -90,10 +90,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
return &mShortcutPolicy;
|
return &mShortcutPolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool addUnigramWord(const int *const word, const int length, const int probability,
|
bool addUnigramWord(const int *const word, const int length,
|
||||||
const int *const shortcutTargetCodePoints, const int shortcutLength,
|
const UnigramProperty *const unigramProperty);
|
||||||
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
|
|
||||||
const int timestamp);
|
|
||||||
|
|
||||||
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||||
const int length1, const int probability, const int timestamp);
|
const int length1, const int probability, const int timestamp);
|
||||||
|
|
43
native/jni/src/utils/jni_data_utils.h
Normal file
43
native/jni/src/utils/jni_data_utils.h
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_JNI_DATA_UTILS_H
|
||||||
|
#define LATINIME_JNI_DATA_UTILS_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "jni.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class JniDataUtils {
|
||||||
|
public:
|
||||||
|
static void jintarrayToVector(JNIEnv *env, jintArray array, std::vector<int> *const outVector) {
|
||||||
|
if (!array) {
|
||||||
|
outVector->clear();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const jsize arrayLength = env->GetArrayLength(array);
|
||||||
|
outVector->resize(arrayLength);
|
||||||
|
env->GetIntArrayRegion(array, 0 /* start */, arrayLength, outVector->data());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils);
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_JNI_DATA_UTILS_H
|
Loading…
Reference in a new issue