Merge "Extend jni method to use multiple previous words."
This commit is contained in:
commit
67f863f4cb
4 changed files with 109 additions and 59 deletions
|
@ -174,8 +174,8 @@ public final class BinaryDictionary extends Dictionary {
|
|||
private static native int getFormatVersionNative(long dict);
|
||||
private static native int getProbabilityNative(long dict, int[] word);
|
||||
private static native int getMaxProbabilityOfExactMatchesNative(long dict, int[] word);
|
||||
private static native int getNgramProbabilityNative(long dict, int[] word0,
|
||||
boolean isBeginningOfSentence, int[] word1);
|
||||
private static native int getNgramProbabilityNative(long dict, int[][] prevWordCodePointArrays,
|
||||
boolean[] isBeginningOfSentenceArray, int[] word);
|
||||
private static native void getWordPropertyNative(long dict, int[] word,
|
||||
boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags,
|
||||
int[] outProbabilityInfo, ArrayList<int[]> outBigramTargets,
|
||||
|
@ -186,7 +186,7 @@ public final class BinaryDictionary extends Dictionary {
|
|||
private static native void getSuggestionsNative(long dict, long proximityInfo,
|
||||
long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
|
||||
int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions,
|
||||
int[] prevWordCodePointArray, boolean isBeginningOfSentence,
|
||||
int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
|
||||
int[] outputSuggestionCount, int[] outputCodePoints, int[] outputScores,
|
||||
int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence,
|
||||
float[] inOutLanguageWeight);
|
||||
|
@ -194,10 +194,11 @@ public final class BinaryDictionary extends Dictionary {
|
|||
int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence,
|
||||
boolean isNotAWord, boolean isBlacklisted, int timestamp);
|
||||
private static native boolean removeUnigramEntryNative(long dict, int[] word);
|
||||
private static native boolean addNgramEntryNative(long dict, int[] word0,
|
||||
boolean isBeginningOfSentence, int[] word1, int probability, int timestamp);
|
||||
private static native boolean removeNgramEntryNative(long dict, int[] word0,
|
||||
boolean isBeginningOfSentence, int[] word1);
|
||||
private static native boolean addNgramEntryNative(long dict,
|
||||
int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
|
||||
int[] word, int probability, int timestamp);
|
||||
private static native boolean removeNgramEntryNative(long dict,
|
||||
int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word);
|
||||
private static native int addMultipleDictionaryEntriesNative(long dict,
|
||||
LanguageModelParam[] languageModelParams, int startIndex);
|
||||
private static native String getPropertyNative(long dict, String query);
|
||||
|
@ -290,8 +291,8 @@ public final class BinaryDictionary extends Dictionary {
|
|||
getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(),
|
||||
inputPointers.getYCoordinates(), inputPointers.getTimes(),
|
||||
inputPointers.getPointerIds(), session.mInputCodePoints, inputSize,
|
||||
session.mNativeSuggestOptions.getOptions(), session.mPrevWordCodePointArrays[0],
|
||||
session.mIsBeginningOfSentenceArray[0], session.mOutputSuggestionCount,
|
||||
session.mNativeSuggestOptions.getOptions(), session.mPrevWordCodePointArrays,
|
||||
session.mIsBeginningOfSentenceArray, session.mOutputSuggestionCount,
|
||||
session.mOutputCodePoints, session.mOutputScores, session.mSpaceIndices,
|
||||
session.mOutputTypes, session.mOutputAutoCommitFirstWordConfidence,
|
||||
session.mInputOutputLanguageWeight);
|
||||
|
@ -359,8 +360,8 @@ public final class BinaryDictionary extends Dictionary {
|
|||
new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
|
||||
final int[] wordCodePoints = StringUtils.toCodePointArray(word);
|
||||
return getNgramProbabilityNative(mNativeDict, prevWordCodePointArrays[0],
|
||||
isBeginningOfSentenceArray[0], wordCodePoints);
|
||||
return getNgramProbabilityNative(mNativeDict, prevWordCodePointArrays,
|
||||
isBeginningOfSentenceArray, wordCodePoints);
|
||||
}
|
||||
|
||||
public WordProperty getWordProperty(final String word, final boolean isBeginningOfSentence) {
|
||||
|
@ -456,8 +457,8 @@ public final class BinaryDictionary extends Dictionary {
|
|||
new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
|
||||
final int[] wordCodePoints = StringUtils.toCodePointArray(word);
|
||||
if (!addNgramEntryNative(mNativeDict, prevWordCodePointArrays[0],
|
||||
isBeginningOfSentenceArray[0], wordCodePoints, probability, timestamp)) {
|
||||
if (!addNgramEntryNative(mNativeDict, prevWordCodePointArrays,
|
||||
isBeginningOfSentenceArray, wordCodePoints, probability, timestamp)) {
|
||||
return false;
|
||||
}
|
||||
mHasUpdated = true;
|
||||
|
@ -474,8 +475,8 @@ public final class BinaryDictionary extends Dictionary {
|
|||
new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
|
||||
final int[] wordCodePoints = StringUtils.toCodePointArray(word);
|
||||
if (!removeNgramEntryNative(mNativeDict, prevWordCodePointArrays[0],
|
||||
isBeginningOfSentenceArray[0], wordCodePoints)) {
|
||||
if (!removeNgramEntryNative(mNativeDict, prevWordCodePointArrays,
|
||||
isBeginningOfSentenceArray, wordCodePoints)) {
|
||||
return false;
|
||||
}
|
||||
mHasUpdated = true;
|
||||
|
|
|
@ -178,7 +178,7 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
|
|||
jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
|
||||
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
|
||||
jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions,
|
||||
jintArray prevWordCodePointsForBigrams, jboolean isBeginningOfSentence,
|
||||
jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
|
||||
jintArray outSuggestionCount, jintArray outCodePointsArray, jintArray outScoresArray,
|
||||
jintArray outSpaceIndicesArray, jintArray outTypesArray,
|
||||
jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray inOutLanguageWeight) {
|
||||
|
@ -201,20 +201,11 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
|
|||
int pointerIds[inputSize];
|
||||
const jsize inputCodePointsLength = env->GetArrayLength(inputCodePointsArray);
|
||||
int inputCodePoints[inputCodePointsLength];
|
||||
const jsize prevWordCodePointsLength =
|
||||
prevWordCodePointsForBigrams ? env->GetArrayLength(prevWordCodePointsForBigrams) : 0;
|
||||
int prevWordCodePointsInternal[prevWordCodePointsLength];
|
||||
int *prevWordCodePoints = nullptr;
|
||||
env->GetIntArrayRegion(xCoordinatesArray, 0, inputSize, xCoordinates);
|
||||
env->GetIntArrayRegion(yCoordinatesArray, 0, inputSize, yCoordinates);
|
||||
env->GetIntArrayRegion(timesArray, 0, inputSize, times);
|
||||
env->GetIntArrayRegion(pointerIdsArray, 0, inputSize, pointerIds);
|
||||
env->GetIntArrayRegion(inputCodePointsArray, 0, inputCodePointsLength, inputCodePoints);
|
||||
if (prevWordCodePointsForBigrams) {
|
||||
env->GetIntArrayRegion(prevWordCodePointsForBigrams, 0, prevWordCodePointsLength,
|
||||
prevWordCodePointsInternal);
|
||||
prevWordCodePoints = prevWordCodePointsInternal;
|
||||
}
|
||||
|
||||
const jsize numberOfOptions = env->GetArrayLength(suggestOptions);
|
||||
int options[numberOfOptions];
|
||||
|
@ -248,8 +239,8 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
|
|||
float languageWeight;
|
||||
env->GetFloatArrayRegion(inOutLanguageWeight, 0, 1 /* len */, &languageWeight);
|
||||
SuggestionResults suggestionResults(MAX_RESULTS);
|
||||
const PrevWordsInfo prevWordsInfo(prevWordCodePoints, prevWordCodePointsLength,
|
||||
isBeginningOfSentence);
|
||||
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
|
||||
prevWordCodePointArrays, isBeginningOfSentenceArray);
|
||||
if (givenSuggestOptions.isGesture() || inputSize > 0) {
|
||||
// TODO: Use SuggestionResults to return suggestions.
|
||||
dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
|
||||
|
@ -391,41 +382,38 @@ static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass cla
|
|||
}
|
||||
|
||||
static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
|
||||
jintArray word0, jboolean isBeginningOfSentence, jintArray word1, jint probability,
|
||||
jint timestamp) {
|
||||
jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
|
||||
jintArray word, jint probability, jint timestamp) {
|
||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||
if (!dictionary) {
|
||||
return false;
|
||||
}
|
||||
jsize word0Length = env->GetArrayLength(word0);
|
||||
int word0CodePoints[word0Length];
|
||||
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
|
||||
jsize word1Length = env->GetArrayLength(word1);
|
||||
int word1CodePoints[word1Length];
|
||||
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
|
||||
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
|
||||
prevWordCodePointArrays, isBeginningOfSentenceArray);
|
||||
jsize wordLength = env->GetArrayLength(word);
|
||||
int wordCodePoints[wordLength];
|
||||
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
|
||||
const std::vector<int> bigramTargetCodePoints(
|
||||
word1CodePoints, word1CodePoints + word1Length);
|
||||
wordCodePoints, wordCodePoints + wordLength);
|
||||
// Use 1 for count to indicate the bigram has inputted.
|
||||
const BigramProperty bigramProperty(&bigramTargetCodePoints, probability,
|
||||
timestamp, 0 /* level */, 1 /* count */);
|
||||
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
|
||||
return dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
|
||||
}
|
||||
|
||||
static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
|
||||
jintArray word0, jboolean isBeginningOfSentence, jintArray word1) {
|
||||
jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
|
||||
jintArray word) {
|
||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||
if (!dictionary) {
|
||||
return false;
|
||||
}
|
||||
jsize word0Length = env->GetArrayLength(word0);
|
||||
int word0CodePoints[word0Length];
|
||||
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
|
||||
jsize word1Length = env->GetArrayLength(word1);
|
||||
int word1CodePoints[word1Length];
|
||||
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
|
||||
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
|
||||
return dictionary->removeNgramEntry(&prevWordsInfo, word1CodePoints, word1Length);
|
||||
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
|
||||
prevWordCodePointArrays, isBeginningOfSentenceArray);
|
||||
jsize wordLength = env->GetArrayLength(word);
|
||||
int wordCodePoints[wordLength];
|
||||
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
|
||||
return dictionary->removeNgramEntry(&prevWordsInfo, wordCodePoints, wordLength);
|
||||
}
|
||||
|
||||
// Returns how many language model params are processed.
|
||||
|
@ -672,7 +660,7 @@ static const JNINativeMethod sMethods[] = {
|
|||
},
|
||||
{
|
||||
const_cast<char *>("getSuggestionsNative"),
|
||||
const_cast<char *>("(JJJ[I[I[I[I[II[I[IZ[I[I[I[I[I[I[F)V"),
|
||||
const_cast<char *>("(JJJ[I[I[I[I[II[I[[I[Z[I[I[I[I[I[I[F)V"),
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
|
||||
},
|
||||
{
|
||||
|
@ -687,7 +675,7 @@ static const JNINativeMethod sMethods[] = {
|
|||
},
|
||||
{
|
||||
const_cast<char *>("getNgramProbabilityNative"),
|
||||
const_cast<char *>("(J[IZ[I)I"),
|
||||
const_cast<char *>("(J[[I[Z[I)I"),
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_getNgramProbability)
|
||||
},
|
||||
{
|
||||
|
@ -713,12 +701,12 @@ static const JNINativeMethod sMethods[] = {
|
|||
},
|
||||
{
|
||||
const_cast<char *>("addNgramEntryNative"),
|
||||
const_cast<char *>("(J[IZ[III)Z"),
|
||||
const_cast<char *>("(J[[I[Z[III)Z"),
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_addNgramEntry)
|
||||
},
|
||||
{
|
||||
const_cast<char *>("removeNgramEntryNative"),
|
||||
const_cast<char *>("(J[IZ[I)Z"),
|
||||
const_cast<char *>("(J[[I[Z[I)Z"),
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_removeNgramEntry)
|
||||
},
|
||||
{
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
namespace latinime {
|
||||
|
||||
// TODO: Support n-gram.
|
||||
// This class does not take ownership of any code point buffers.
|
||||
class PrevWordsInfo {
|
||||
public:
|
||||
// No prev word information.
|
||||
|
@ -33,21 +32,52 @@ class PrevWordsInfo {
|
|||
clear();
|
||||
}
|
||||
|
||||
PrevWordsInfo(PrevWordsInfo &&prevWordsInfo) {
|
||||
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
|
||||
mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i];
|
||||
memmove(mPrevWordCodePoints[i], prevWordsInfo.mPrevWordCodePoints[i],
|
||||
sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]);
|
||||
mIsBeginningOfSentence[i] = prevWordsInfo.mIsBeginningOfSentence[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Construct from previous words.
|
||||
PrevWordsInfo(const int prevWordCodePoints[][MAX_WORD_LENGTH],
|
||||
const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
|
||||
const size_t prevWordCount) {
|
||||
clear();
|
||||
for (size_t i = 0; i < std::min(NELEMS(mPrevWordCodePoints), prevWordCount); ++i) {
|
||||
if (prevWordCodePointCount[i] < 0 || prevWordCodePointCount[i] > MAX_WORD_LENGTH) {
|
||||
continue;
|
||||
}
|
||||
memmove(mPrevWordCodePoints[i], prevWordCodePoints[i],
|
||||
sizeof(mPrevWordCodePoints[i][0]) * prevWordCodePointCount[i]);
|
||||
mPrevWordCodePointCount[i] = prevWordCodePointCount[i];
|
||||
mIsBeginningOfSentence[i] = isBeginningOfSentence[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Construct from a previous word.
|
||||
PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount,
|
||||
const bool isBeginningOfSentence) {
|
||||
clear();
|
||||
mPrevWordCodePoints[0] = prevWordCodePoints;
|
||||
if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) {
|
||||
return;
|
||||
}
|
||||
memmove(mPrevWordCodePoints[0], prevWordCodePoints,
|
||||
sizeof(mPrevWordCodePoints[0][0]) * prevWordCodePointCount);
|
||||
mPrevWordCodePointCount[0] = prevWordCodePointCount;
|
||||
mIsBeginningOfSentence[0] = isBeginningOfSentence;
|
||||
}
|
||||
|
||||
bool isValid() const {
|
||||
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
|
||||
if (mPrevWordCodePointCount[i] > MAX_WORD_LENGTH) {
|
||||
return false;
|
||||
}
|
||||
if (mPrevWordCodePointCount[0] > 0) {
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
if (mIsBeginningOfSentence[0]) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void getPrevWordsTerminalPtNodePos(
|
||||
|
@ -168,13 +198,12 @@ class PrevWordsInfo {
|
|||
|
||||
void clear() {
|
||||
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
|
||||
mPrevWordCodePoints[i] = nullptr;
|
||||
mPrevWordCodePointCount[i] = 0;
|
||||
mIsBeginningOfSentence[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
const int *mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
|
||||
int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
};
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
#include "defines.h"
|
||||
#include "jni.h"
|
||||
#include "suggest/core/session/prev_words_info.h"
|
||||
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
|
||||
#include "utils/char_utils.h"
|
||||
|
@ -95,6 +96,37 @@ class JniDataUtils {
|
|||
}
|
||||
}
|
||||
|
||||
static PrevWordsInfo constructPrevWordsInfo(JNIEnv *env, jobjectArray prevWordCodePointArrays,
|
||||
jbooleanArray isBeginningOfSentenceArray) {
|
||||
int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
|
||||
int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
jsize prevWordsCount = env->GetArrayLength(prevWordCodePointArrays);
|
||||
for (size_t i = 0; i < NELEMS(prevWordCodePoints); ++i) {
|
||||
prevWordCodePointCount[i] = 0;
|
||||
isBeginningOfSentence[i] = false;
|
||||
if (prevWordsCount <= static_cast<int>(i)) {
|
||||
continue;
|
||||
}
|
||||
jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i);
|
||||
if (!prevWord) {
|
||||
continue;
|
||||
}
|
||||
jsize prevWordLength = env->GetArrayLength(prevWord);
|
||||
if (prevWordLength > MAX_WORD_LENGTH) {
|
||||
continue;
|
||||
}
|
||||
env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]);
|
||||
prevWordCodePointCount[i] = prevWordLength;
|
||||
jboolean isBeginningOfSentenceBoolean = JNI_FALSE;
|
||||
env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */,
|
||||
&isBeginningOfSentenceBoolean);
|
||||
isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE;
|
||||
}
|
||||
return PrevWordsInfo(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
|
||||
MAX_PREV_WORD_COUNT_FOR_N_GRAM);
|
||||
}
|
||||
|
||||
static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index,
|
||||
const jboolean value) {
|
||||
env->SetBooleanArrayRegion(array, index, 1 /* len */, &value);
|
||||
|
|
Loading…
Reference in a new issue