Add a jni method to fetch unigram information.
Bug: 11956652 Change-Id: I59f1823bbc0146f6d2f34cbed8166bb6de197208main
parent
bc33efa4bd
commit
9fb28f78f7
|
@ -26,6 +26,7 @@ import com.android.inputmethod.latin.settings.NativeSuggestOptions;
|
||||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||||
import com.android.inputmethod.latin.utils.JniUtils;
|
import com.android.inputmethod.latin.utils.JniUtils;
|
||||||
import com.android.inputmethod.latin.utils.StringUtils;
|
import com.android.inputmethod.latin.utils.StringUtils;
|
||||||
|
import com.android.inputmethod.latin.utils.UnigramProperty;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -59,6 +60,19 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
|
|
||||||
public static final int NOT_A_VALID_TIMESTAMP = -1;
|
public static final int NOT_A_VALID_TIMESTAMP = -1;
|
||||||
|
|
||||||
|
// Format to get unigram flags from native side via getUnigramPropertyNative().
|
||||||
|
private static final int FORMAT_UNIGRAM_PROPERTY_OUTPUT_FLAG_COUNT = 4;
|
||||||
|
private static final int FORMAT_UNIGRAM_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
|
||||||
|
private static final int FORMAT_UNIGRAM_PROPERTY_IS_BLACKLISTED_INDEX = 1;
|
||||||
|
private static final int FORMAT_UNIGRAM_PROPERTY_HAS_BIGRAMS_INDEX = 2;
|
||||||
|
private static final int FORMAT_UNIGRAM_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
|
||||||
|
|
||||||
|
// Format to get unigram historical info from native side via getUnigramPropertyNative().
|
||||||
|
private static final int FORMAT_UNIGRAM_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT = 3;
|
||||||
|
private static final int FORMAT_UNIGRAM_PROPERTY_TIMESTAMP_INDEX = 0;
|
||||||
|
private static final int FORMAT_UNIGRAM_PROPERTY_LEVEL_INDEX = 1;
|
||||||
|
private static final int FORMAT_UNIGRAM_PROPERTY_COUNT_INDEX = 2;
|
||||||
|
|
||||||
private long mNativeDict;
|
private long mNativeDict;
|
||||||
private final Locale mLocale;
|
private final Locale mLocale;
|
||||||
private final long mDictSize;
|
private final long mDictSize;
|
||||||
|
@ -129,6 +143,10 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
private static native int getFormatVersionNative(long dict);
|
private static native int getFormatVersionNative(long dict);
|
||||||
private static native int getProbabilityNative(long dict, int[] word);
|
private static native int getProbabilityNative(long dict, int[] word);
|
||||||
private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
|
private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
|
||||||
|
private static native void getUnigramPropertyNative(long dict, int[] word,
|
||||||
|
int[] outCodePoints, boolean[] outFlags, int[] outProbability,
|
||||||
|
int[] outHistoricalInfo, ArrayList<int[]> outShortcutTargets,
|
||||||
|
ArrayList<Integer> outShortcutProbabilities);
|
||||||
private static native int getSuggestionsNative(long dict, long proximityInfo,
|
private static native int getSuggestionsNative(long dict, long proximityInfo,
|
||||||
long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
|
long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
|
||||||
int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint,
|
int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint,
|
||||||
|
@ -290,6 +308,32 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1);
|
return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@UsedForTesting
|
||||||
|
public UnigramProperty getUnigramProperty(final String word) {
|
||||||
|
if (TextUtils.isEmpty(word)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
final int[] codePoints = StringUtils.toCodePointArray(word);
|
||||||
|
final int[] outCodePoints = new int[MAX_WORD_LENGTH];
|
||||||
|
final boolean[] outFlags = new boolean[FORMAT_UNIGRAM_PROPERTY_OUTPUT_FLAG_COUNT];
|
||||||
|
final int[] outProbability = new int[1];
|
||||||
|
final int[] outHistoricalInfo =
|
||||||
|
new int[FORMAT_UNIGRAM_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT];
|
||||||
|
final ArrayList<int[]> outShortcutTargets = CollectionUtils.newArrayList();
|
||||||
|
final ArrayList<Integer> outShortcutProbabilities = CollectionUtils.newArrayList();
|
||||||
|
getUnigramPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbability,
|
||||||
|
outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
|
||||||
|
return new UnigramProperty(codePoints,
|
||||||
|
outFlags[FORMAT_UNIGRAM_PROPERTY_IS_NOT_A_WORD_INDEX],
|
||||||
|
outFlags[FORMAT_UNIGRAM_PROPERTY_IS_BLACKLISTED_INDEX],
|
||||||
|
outFlags[FORMAT_UNIGRAM_PROPERTY_HAS_BIGRAMS_INDEX],
|
||||||
|
outFlags[FORMAT_UNIGRAM_PROPERTY_HAS_SHORTCUTS_INDEX], outProbability[0],
|
||||||
|
outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_TIMESTAMP_INDEX],
|
||||||
|
outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_LEVEL_INDEX],
|
||||||
|
outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_COUNT_INDEX],
|
||||||
|
outShortcutTargets, outShortcutProbabilities);
|
||||||
|
}
|
||||||
|
|
||||||
// Add a unigram entry to binary dictionary with unigram attributes in native code.
|
// Add a unigram entry to binary dictionary with unigram attributes in native code.
|
||||||
public void addUnigramWord(final String word, final int probability,
|
public void addUnigramWord(final String word, final int probability,
|
||||||
final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord,
|
final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord,
|
||||||
|
|
|
@ -0,0 +1,82 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
package com.android.inputmethod.latin.utils;
|
||||||
|
|
||||||
|
import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
|
import com.android.inputmethod.latin.BinaryDictionary;
|
||||||
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
// This has information that belong to a unigram. This class has some detailed attributes such as
|
||||||
|
// historical information but they have to be checked only for testing purpose.
|
||||||
|
@UsedForTesting
|
||||||
|
public class UnigramProperty {
|
||||||
|
public final String mCodePoints;
|
||||||
|
public final boolean mIsNotAWord;
|
||||||
|
public final boolean mIsBlacklisted;
|
||||||
|
public final boolean mHasBigrams;
|
||||||
|
public final boolean mHasShortcuts;
|
||||||
|
public final int mProbability;
|
||||||
|
// mTimestamp, mLevel and mCount are historical info. These values are depend on the
|
||||||
|
// implementation in native code; thus, we must not use them and have any assumptions about
|
||||||
|
// them except for tests.
|
||||||
|
public final int mTimestamp;
|
||||||
|
public final int mLevel;
|
||||||
|
public final int mCount;
|
||||||
|
public final ArrayList<WeightedString> mShortcutTargets = CollectionUtils.newArrayList();
|
||||||
|
|
||||||
|
private static int getCodePointCount(final int[] codePoints) {
|
||||||
|
for (int i = 0; i < codePoints.length; i++) {
|
||||||
|
if (codePoints[i] == 0) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return codePoints.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This represents invalid unigram when the probability is BinaryDictionary.NOT_A_PROBABILITY.
|
||||||
|
public UnigramProperty(final int[] codePoints, final boolean isNotAWord,
|
||||||
|
final boolean isBlacklisted, final boolean hasBigram,
|
||||||
|
final boolean hasShortcuts, final int probability, final int timestamp,
|
||||||
|
final int level, final int count, final ArrayList<int[]> shortcutTargets,
|
||||||
|
final ArrayList<Integer> shortcutProbabilities) {
|
||||||
|
mCodePoints = new String(codePoints, 0 /* offset */, getCodePointCount(codePoints));
|
||||||
|
mIsNotAWord = isNotAWord;
|
||||||
|
mIsBlacklisted = isBlacklisted;
|
||||||
|
mHasBigrams = hasBigram;
|
||||||
|
mHasShortcuts = hasShortcuts;
|
||||||
|
mProbability = probability;
|
||||||
|
mTimestamp = timestamp;
|
||||||
|
mLevel = level;
|
||||||
|
mCount = count;
|
||||||
|
final int shortcutTargetCount = shortcutTargets.size();
|
||||||
|
for (int i = 0; i < shortcutTargetCount; i++) {
|
||||||
|
final int[] shortcutTargetCodePointArray = shortcutTargets.get(i);
|
||||||
|
final String shortcutTargetString = new String(shortcutTargetCodePointArray,
|
||||||
|
0 /* offset */, getCodePointCount(shortcutTargetCodePointArray));
|
||||||
|
mShortcutTargets.add(
|
||||||
|
new WeightedString(shortcutTargetString, shortcutProbabilities.get(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@UsedForTesting
|
||||||
|
public boolean isValid() {
|
||||||
|
return mProbability != BinaryDictionary.NOT_A_PROBABILITY;
|
||||||
|
}
|
||||||
|
}
|
|
@ -58,7 +58,8 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
dictionary.cpp \
|
dictionary.cpp \
|
||||||
digraph_utils.cpp \
|
digraph_utils.cpp \
|
||||||
error_type_utils.cpp \
|
error_type_utils.cpp \
|
||||||
multi_bigram_map.cpp) \
|
multi_bigram_map.cpp \
|
||||||
|
unigram_property.cpp) \
|
||||||
$(addprefix suggest/core/layout/, \
|
$(addprefix suggest/core/layout/, \
|
||||||
additional_proximity_chars.cpp \
|
additional_proximity_chars.cpp \
|
||||||
proximity_info.cpp \
|
proximity_info.cpp \
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#include "jni.h"
|
#include "jni.h"
|
||||||
#include "jni_common.h"
|
#include "jni_common.h"
|
||||||
#include "suggest/core/dictionary/dictionary.h"
|
#include "suggest/core/dictionary/dictionary.h"
|
||||||
|
#include "suggest/core/dictionary/unigram_property.h"
|
||||||
#include "suggest/core/suggest_options.h"
|
#include "suggest/core/suggest_options.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
|
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
||||||
|
@ -266,6 +267,21 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c
|
||||||
word1Length);
|
word1Length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void latinime_BinaryDictionary_getUnigramProperty(JNIEnv *env, jclass clazz,
|
||||||
|
jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags,
|
||||||
|
jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
|
||||||
|
jobject outShortcutProbabilities) {
|
||||||
|
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||||
|
if (!dictionary) return;
|
||||||
|
const jsize wordLength = env->GetArrayLength(word);
|
||||||
|
int wordCodePoints[wordLength];
|
||||||
|
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
|
||||||
|
const UnigramProperty unigramProperty = dictionary->getUnigramProperty(
|
||||||
|
wordCodePoints, wordLength);
|
||||||
|
unigramProperty.outputProperties(env, outCodePoints, outFlags, outProbability,
|
||||||
|
outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
|
||||||
|
}
|
||||||
|
|
||||||
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
|
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
|
||||||
jintArray before, jintArray after, jint score) {
|
jintArray before, jintArray after, jint score) {
|
||||||
jsize beforeLength = env->GetArrayLength(before);
|
jsize beforeLength = env->GetArrayLength(before);
|
||||||
|
@ -341,7 +357,6 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz
|
||||||
word1Length);
|
word1Length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Returns how many language model params are processed.
|
// Returns how many language model params are processed.
|
||||||
static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz,
|
static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz,
|
||||||
jlong dict, jobjectArray languageModelParams, jint startIndex) {
|
jlong dict, jobjectArray languageModelParams, jint startIndex) {
|
||||||
|
@ -506,6 +521,11 @@ static const JNINativeMethod sMethods[] = {
|
||||||
const_cast<char *>("(J[I[I)I"),
|
const_cast<char *>("(J[I[I)I"),
|
||||||
reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
|
reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
const_cast<char *>("getUnigramPropertyNative"),
|
||||||
|
const_cast<char *>("(J[I[I[Z[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"),
|
||||||
|
reinterpret_cast<void *>(latinime_BinaryDictionary_getUnigramProperty)
|
||||||
|
},
|
||||||
{
|
{
|
||||||
const_cast<char *>("calcNormalizedScoreNative"),
|
const_cast<char *>("calcNormalizedScoreNative"),
|
||||||
const_cast<char *>("([I[II)F"),
|
const_cast<char *>("([I[II)F"),
|
||||||
|
|
|
@ -143,6 +143,13 @@ void Dictionary::getProperty(const char *const query, const int queryLength, cha
|
||||||
maxResultLength);
|
maxResultLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const UnigramProperty Dictionary::getUnigramProperty(const int *const codePoints,
|
||||||
|
const int codePointCount) {
|
||||||
|
TimeKeeper::setCurrentTime();
|
||||||
|
return mDictionaryStructureWithBufferPolicy.get()->getUnigramProperty(
|
||||||
|
codePoints, codePointCount);
|
||||||
|
}
|
||||||
|
|
||||||
void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
|
void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
|
||||||
int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
|
int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
|
||||||
int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
|
int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "jni.h"
|
#include "jni.h"
|
||||||
#include "suggest/core/dictionary/bigram_dictionary.h"
|
#include "suggest/core/dictionary/bigram_dictionary.h"
|
||||||
|
#include "suggest/core/dictionary/unigram_property.h"
|
||||||
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
#include "suggest/core/suggest_interface.h"
|
#include "suggest/core/suggest_interface.h"
|
||||||
|
@ -33,6 +34,7 @@ class DictionaryStructureWithBufferPolicy;
|
||||||
class DicTraverseSession;
|
class DicTraverseSession;
|
||||||
class ProximityInfo;
|
class ProximityInfo;
|
||||||
class SuggestOptions;
|
class SuggestOptions;
|
||||||
|
class UnigramProperty;
|
||||||
|
|
||||||
class Dictionary {
|
class Dictionary {
|
||||||
public:
|
public:
|
||||||
|
@ -92,6 +94,8 @@ class Dictionary {
|
||||||
void getProperty(const char *const query, const int queryLength, char *const outResult,
|
void getProperty(const char *const query, const int queryLength, char *const outResult,
|
||||||
const int maxResultLength);
|
const int maxResultLength);
|
||||||
|
|
||||||
|
const UnigramProperty getUnigramProperty(const int *const codePoints, const int codePointCount);
|
||||||
|
|
||||||
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
|
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
|
||||||
return mDictionaryStructureWithBufferPolicy.get();
|
return mDictionaryStructureWithBufferPolicy.get();
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,52 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/core/dictionary/unigram_property.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
void UnigramProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
|
||||||
|
jbooleanArray outFlags, jintArray outProbability, jintArray outHistoricalInfo,
|
||||||
|
jobject outShortcutTargets, jobject outShortcutProbabilities) const {
|
||||||
|
env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePointCount, mCodePoints);
|
||||||
|
jboolean flags[] = {mIsNotAWord, mIsBlacklisted, mHasBigrams, mHasShortcuts};
|
||||||
|
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
|
||||||
|
env->SetIntArrayRegion(outProbability, 0 /* start */, 1 /* len */, &mProbability);
|
||||||
|
int historicalInfo[] = {mTimestamp, mLevel, mCount};
|
||||||
|
env->SetIntArrayRegion(outHistoricalInfo, 0 /* start */, NELEMS(historicalInfo),
|
||||||
|
historicalInfo);
|
||||||
|
|
||||||
|
jclass integerClass = env->FindClass("java/lang/Integer");
|
||||||
|
jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V");
|
||||||
|
jclass arrayListClass = env->FindClass("java/util/ArrayList");
|
||||||
|
jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
|
||||||
|
const int shortcutTargetCount = mShortcutTargets.size();
|
||||||
|
for (int i = 0; i < shortcutTargetCount; ++i) {
|
||||||
|
jintArray shortcutTargetCodePointArray = env->NewIntArray(mShortcutTargets[i].size());
|
||||||
|
env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */,
|
||||||
|
mShortcutTargets[i].size(), &mShortcutTargets[i][0]);
|
||||||
|
env->CallVoidMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray);
|
||||||
|
env->DeleteLocalRef(shortcutTargetCodePointArray);
|
||||||
|
jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId,
|
||||||
|
mShortcutProbabilities[i]);
|
||||||
|
env->CallVoidMethod(outShortcutProbabilities, addMethodId, integerProbability);
|
||||||
|
env->DeleteLocalRef(integerProbability);
|
||||||
|
}
|
||||||
|
env->DeleteLocalRef(integerClass);
|
||||||
|
env->DeleteLocalRef(arrayListClass);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace latinime
|
|
@ -0,0 +1,87 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_UNIGRAM_PROPERTY_H
|
||||||
|
#define LATINIME_UNIGRAM_PROPERTY_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "jni.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
// This class is used for returning information belonging to a unigram to java side.
|
||||||
|
class UnigramProperty {
|
||||||
|
public:
|
||||||
|
// Invalid unigram.
|
||||||
|
UnigramProperty()
|
||||||
|
: mCodePoints(), mCodePointCount(0), mIsNotAWord(false), mIsBlacklisted(false),
|
||||||
|
mHasBigrams(false), mHasShortcuts(false), mProbability(NOT_A_PROBABILITY),
|
||||||
|
mTimestamp(0), mLevel(0), mCount(0), mShortcutTargets(),
|
||||||
|
mShortcutProbabilities() {}
|
||||||
|
|
||||||
|
UnigramProperty(const UnigramProperty &unigramProperty)
|
||||||
|
: mCodePoints(), mCodePointCount(unigramProperty.mCodePointCount),
|
||||||
|
mIsNotAWord(unigramProperty.mIsNotAWord),
|
||||||
|
mIsBlacklisted(unigramProperty.mIsBlacklisted),
|
||||||
|
mHasBigrams(unigramProperty.mHasBigrams),
|
||||||
|
mHasShortcuts(unigramProperty.mHasShortcuts),
|
||||||
|
mProbability(unigramProperty.mProbability),
|
||||||
|
mTimestamp(unigramProperty.mTimestamp), mLevel(unigramProperty.mLevel),
|
||||||
|
mCount(unigramProperty.mCount), mShortcutTargets(unigramProperty.mShortcutTargets),
|
||||||
|
mShortcutProbabilities(unigramProperty.mShortcutProbabilities) {
|
||||||
|
memcpy(mCodePoints, unigramProperty.mCodePoints, sizeof(mCodePoints));
|
||||||
|
}
|
||||||
|
|
||||||
|
UnigramProperty(const int *const codePoints, const int codePointCount,
|
||||||
|
const bool isNotAWord, const bool isBlacklisted, const bool hasBigrams,
|
||||||
|
const bool hasShortcuts, const int probability, const int timestamp,
|
||||||
|
const int level, const int count,
|
||||||
|
const std::vector<std::vector<int> > *const shortcutTargets,
|
||||||
|
const std::vector<int> *const shortcutProbabilities)
|
||||||
|
: mCodePoints(), mCodePointCount(codePointCount),
|
||||||
|
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mHasBigrams(hasBigrams),
|
||||||
|
mHasShortcuts(hasShortcuts), mProbability(probability), mTimestamp(timestamp),
|
||||||
|
mLevel(level), mCount(count), mShortcutTargets(*shortcutTargets),
|
||||||
|
mShortcutProbabilities(*shortcutProbabilities) {
|
||||||
|
memcpy(mCodePoints, codePoints, sizeof(mCodePoints));
|
||||||
|
}
|
||||||
|
|
||||||
|
void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
|
||||||
|
jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
|
||||||
|
jobject outShortcutProbabilities) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
|
||||||
|
|
||||||
|
int mCodePoints[MAX_WORD_LENGTH];
|
||||||
|
int mCodePointCount;
|
||||||
|
bool mIsNotAWord;
|
||||||
|
bool mIsBlacklisted;
|
||||||
|
bool mHasBigrams;
|
||||||
|
bool mHasShortcuts;
|
||||||
|
int mProbability;
|
||||||
|
// Historical information
|
||||||
|
int mTimestamp;
|
||||||
|
int mLevel;
|
||||||
|
int mCount;
|
||||||
|
// Shortcut
|
||||||
|
std::vector<std::vector<int> > mShortcutTargets;
|
||||||
|
std::vector<int> mShortcutProbabilities;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_UNIGRAM_PROPERTY_H
|
|
@ -18,6 +18,7 @@
|
||||||
#define LATINIME_DICTIONARY_STRUCTURE_POLICY_H
|
#define LATINIME_DICTIONARY_STRUCTURE_POLICY_H
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/core/dictionary/unigram_property.h"
|
||||||
#include "utils/exclusive_ownership_pointer.h"
|
#include "utils/exclusive_ownership_pointer.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -90,6 +91,10 @@ class DictionaryStructureWithBufferPolicy {
|
||||||
virtual void getProperty(const char *const query, const int queryLength, char *const outResult,
|
virtual void getProperty(const char *const query, const int queryLength, char *const outResult,
|
||||||
const int maxResultLength) = 0;
|
const int maxResultLength) = 0;
|
||||||
|
|
||||||
|
// Used for testing.
|
||||||
|
virtual const UnigramProperty getUnigramProperty(const int *const codePonts,
|
||||||
|
const int codePointCount) const = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DictionaryStructureWithBufferPolicy() {}
|
DictionaryStructureWithBufferPolicy() {}
|
||||||
|
|
||||||
|
|
|
@ -123,6 +123,12 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const UnigramProperty getUnigramProperty(const int *const codePoints,
|
||||||
|
const int codePointCount) const {
|
||||||
|
// getUnigramProperty is not supported.
|
||||||
|
return UnigramProperty();
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
|
||||||
|
|
||||||
|
|
|
@ -16,8 +16,11 @@
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "suggest/core/dicnode/dic_node.h"
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
|
#include "suggest/core/dictionary/unigram_property.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||||
|
@ -290,4 +293,42 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const UnigramProperty Ver4PatriciaTriePolicy::getUnigramProperty(const int *const codePoints,
|
||||||
|
const int codePointCount) const {
|
||||||
|
const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
|
||||||
|
false /* forceLowerCaseSearch */);
|
||||||
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
|
AKLOGE("fetchUnigramProperty is called for invalid word.");
|
||||||
|
return UnigramProperty();
|
||||||
|
}
|
||||||
|
const PtNodeParams ptNodeParams = mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
|
||||||
|
const ProbabilityEntry probabilityEntry =
|
||||||
|
mBuffers.get()->getProbabilityDictContent()->getProbabilityEntry(
|
||||||
|
ptNodeParams.getTerminalId());
|
||||||
|
// Fetch shortcut information.
|
||||||
|
std::vector<std::vector<int> > shortcutTargets;
|
||||||
|
std::vector<int> shortcutProbabilities;
|
||||||
|
if (ptNodeParams.hasShortcutTargets()) {
|
||||||
|
int shortcutTarget[MAX_WORD_LENGTH];
|
||||||
|
const ShortcutDictContent *const shortcutDictContent =
|
||||||
|
mBuffers.get()->getShortcutDictContent();
|
||||||
|
bool hasNext = true;
|
||||||
|
int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
|
||||||
|
while (hasNext) {
|
||||||
|
int shortcutTargetLength = 0;
|
||||||
|
int shortcutProbability = NOT_A_PROBABILITY;
|
||||||
|
shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
|
||||||
|
&shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
|
||||||
|
std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
|
||||||
|
shortcutTargets.push_back(target);
|
||||||
|
shortcutProbabilities.push_back(shortcutProbability);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return UnigramProperty(ptNodeParams.getCodePoints(), ptNodeParams.getCodePointCount(),
|
||||||
|
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
|
||||||
|
ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(),
|
||||||
|
probabilityEntry.getTimeStamp(), probabilityEntry.getLevel(),
|
||||||
|
probabilityEntry.getCount(), &shortcutTargets, &shortcutProbabilities);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -107,6 +107,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
void getProperty(const char *const query, const int queryLength, char *const outResult,
|
void getProperty(const char *const query, const int queryLength, char *const outResult,
|
||||||
const int maxResultLength);
|
const int maxResultLength);
|
||||||
|
|
||||||
|
const UnigramProperty getUnigramProperty(const int *const codePoints,
|
||||||
|
const int codePointCount) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@ import android.util.Pair;
|
||||||
import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam;
|
import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam;
|
||||||
import com.android.inputmethod.latin.makedict.CodePointUtils;
|
import com.android.inputmethod.latin.makedict.CodePointUtils;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||||
|
import com.android.inputmethod.latin.utils.UnigramProperty;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -824,4 +825,52 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
assertEquals(probability, binaryDictionary.getBigramProbability(word0, word1));
|
assertEquals(probability, binaryDictionary.getBigramProbability(word0, word1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testGetUnigramProperties() {
|
||||||
|
testGetUnigramProperties(4 /* formatVersion */);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testGetUnigramProperties(final int formatVersion) {
|
||||||
|
final long seed = System.currentTimeMillis();
|
||||||
|
final Random random = new Random(seed);
|
||||||
|
final int ITERATION_COUNT = 1000;
|
||||||
|
final int codePointSetSize = 20;
|
||||||
|
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
||||||
|
|
||||||
|
File dictFile = null;
|
||||||
|
try {
|
||||||
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
||||||
|
} catch (IOException e) {
|
||||||
|
fail("IOException while writing an initial dictionary : " + e);
|
||||||
|
}
|
||||||
|
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||||
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
|
||||||
|
final UnigramProperty invalidUnigramProperty =
|
||||||
|
binaryDictionary.getUnigramProperty("dummyWord");
|
||||||
|
assertFalse(invalidUnigramProperty.isValid());
|
||||||
|
|
||||||
|
for (int i = 0; i < ITERATION_COUNT; i++) {
|
||||||
|
final String word = CodePointUtils.generateWord(random, codePointSet);
|
||||||
|
final int unigramProbability = random.nextInt(0xFF);
|
||||||
|
final boolean isNotAWord = random.nextBoolean();
|
||||||
|
final boolean isBlacklisted = random.nextBoolean();
|
||||||
|
// TODO: Add tests for shortcut.
|
||||||
|
// TODO: Add tests for historical info.
|
||||||
|
binaryDictionary.addUnigramWord(word, unigramProbability,
|
||||||
|
null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
|
||||||
|
isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
|
||||||
|
final UnigramProperty unigramProperty =
|
||||||
|
binaryDictionary.getUnigramProperty(word);
|
||||||
|
assertEquals(word, unigramProperty.mCodePoints);
|
||||||
|
assertTrue(unigramProperty.isValid());
|
||||||
|
assertEquals(isNotAWord, unigramProperty.mIsNotAWord);
|
||||||
|
assertEquals(isBlacklisted, unigramProperty.mIsBlacklisted);
|
||||||
|
assertEquals(false, unigramProperty.mHasBigrams);
|
||||||
|
assertEquals(false, unigramProperty.mHasShortcuts);
|
||||||
|
assertEquals(unigramProbability, unigramProperty.mProbability);
|
||||||
|
assertTrue(unigramProperty.mShortcutTargets.isEmpty());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue