am fca049a4
: Merge "Extend jni method to dump ngram entries."
* commit 'fca049a4b01a92f2f59d790898f8aedb13ad7e7d': Extend jni method to dump ngram entries.
This commit is contained in:
commit
ef37d49e1b
3 changed files with 33 additions and 20 deletions
|
@ -70,7 +70,7 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5;
|
private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5;
|
||||||
private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
|
private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
|
||||||
private static final int FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX = 1;
|
private static final int FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX = 1;
|
||||||
private static final int FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX = 2;
|
private static final int FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX = 2;
|
||||||
private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
|
private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
|
||||||
private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4;
|
private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4;
|
||||||
|
|
||||||
|
@ -179,9 +179,10 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
boolean[] isBeginningOfSentenceArray, int[] word);
|
boolean[] isBeginningOfSentenceArray, int[] word);
|
||||||
private static native void getWordPropertyNative(long dict, int[] word,
|
private static native void getWordPropertyNative(long dict, int[] word,
|
||||||
boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags,
|
boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags,
|
||||||
int[] outProbabilityInfo, ArrayList<int[]> outBigramTargets,
|
int[] outProbabilityInfo, ArrayList<int[][]> outNgramPrevWordsArray,
|
||||||
ArrayList<int[]> outBigramProbabilityInfo, ArrayList<int[]> outShortcutTargets,
|
ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray,
|
||||||
ArrayList<Integer> outShortcutProbabilities);
|
ArrayList<int[]> outNgramTargets, ArrayList<int[]> outNgramProbabilityInfo,
|
||||||
|
ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities);
|
||||||
private static native int getNextWordNative(long dict, int token, int[] outCodePoints,
|
private static native int getNextWordNative(long dict, int token, int[] outCodePoints,
|
||||||
boolean[] outIsBeginningOfSentence);
|
boolean[] outIsBeginningOfSentence);
|
||||||
private static native void getSuggestionsNative(long dict, long proximityInfo,
|
private static native void getSuggestionsNative(long dict, long proximityInfo,
|
||||||
|
@ -388,20 +389,25 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT];
|
final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT];
|
||||||
final int[] outProbabilityInfo =
|
final int[] outProbabilityInfo =
|
||||||
new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT];
|
new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT];
|
||||||
final ArrayList<int[]> outBigramTargets = new ArrayList<>();
|
final ArrayList<int[][]> outNgramPrevWordsArray = new ArrayList<>();
|
||||||
final ArrayList<int[]> outBigramProbabilityInfo = new ArrayList<>();
|
final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray =
|
||||||
|
new ArrayList<>();
|
||||||
|
final ArrayList<int[]> outNgramTargets = new ArrayList<>();
|
||||||
|
final ArrayList<int[]> outNgramProbabilityInfo = new ArrayList<>();
|
||||||
final ArrayList<int[]> outShortcutTargets = new ArrayList<>();
|
final ArrayList<int[]> outShortcutTargets = new ArrayList<>();
|
||||||
final ArrayList<Integer> outShortcutProbabilities = new ArrayList<>();
|
final ArrayList<Integer> outShortcutProbabilities = new ArrayList<>();
|
||||||
getWordPropertyNative(mNativeDict, codePoints, isBeginningOfSentence, outCodePoints,
|
getWordPropertyNative(mNativeDict, codePoints, isBeginningOfSentence, outCodePoints,
|
||||||
outFlags, outProbabilityInfo, outBigramTargets, outBigramProbabilityInfo,
|
outFlags, outProbabilityInfo, outNgramPrevWordsArray,
|
||||||
outShortcutTargets, outShortcutProbabilities);
|
outNgramPrevWordIsBeginningOfSentenceArray, outNgramTargets,
|
||||||
|
outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities);
|
||||||
return new WordProperty(codePoints,
|
return new WordProperty(codePoints,
|
||||||
outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX],
|
outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX],
|
||||||
outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX],
|
outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX],
|
||||||
outFlags[FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX],
|
outFlags[FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX],
|
||||||
outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX],
|
outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX],
|
||||||
outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo,
|
outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo,
|
||||||
outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
|
outNgramPrevWordsArray, outNgramPrevWordIsBeginningOfSentenceArray,
|
||||||
|
outNgramTargets, outNgramProbabilityInfo, outShortcutTargets,
|
||||||
outShortcutProbabilities);
|
outShortcutProbabilities);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,8 @@ import com.android.inputmethod.latin.utils.StringUtils;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility class for a word with a probability.
|
* Utility class for a word with a probability.
|
||||||
*
|
*
|
||||||
|
@ -49,7 +51,7 @@ public final class WordProperty implements Comparable<WordProperty> {
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public WordProperty(final String word, final ProbabilityInfo probabilityInfo,
|
public WordProperty(final String word, final ProbabilityInfo probabilityInfo,
|
||||||
final ArrayList<WeightedString> shortcutTargets,
|
final ArrayList<WeightedString> shortcutTargets,
|
||||||
final ArrayList<WeightedString> bigrams,
|
@Nullable final ArrayList<WeightedString> bigrams,
|
||||||
final boolean isNotAWord, final boolean isBlacklistEntry) {
|
final boolean isNotAWord, final boolean isBlacklistEntry) {
|
||||||
mWord = word;
|
mWord = word;
|
||||||
mProbabilityInfo = probabilityInfo;
|
mProbabilityInfo = probabilityInfo;
|
||||||
|
@ -85,7 +87,9 @@ public final class WordProperty implements Comparable<WordProperty> {
|
||||||
public WordProperty(final int[] codePoints, final boolean isNotAWord,
|
public WordProperty(final int[] codePoints, final boolean isNotAWord,
|
||||||
final boolean isBlacklisted, final boolean hasBigram, final boolean hasShortcuts,
|
final boolean isBlacklisted, final boolean hasBigram, final boolean hasShortcuts,
|
||||||
final boolean isBeginningOfSentence, final int[] probabilityInfo,
|
final boolean isBeginningOfSentence, final int[] probabilityInfo,
|
||||||
final ArrayList<int[]> bigramTargets, final ArrayList<int[]> bigramProbabilityInfo,
|
final ArrayList<int[][]> ngramPrevWordsArray,
|
||||||
|
final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray,
|
||||||
|
final ArrayList<int[]> ngramTargets, final ArrayList<int[]> ngramProbabilityInfo,
|
||||||
final ArrayList<int[]> shortcutTargets,
|
final ArrayList<int[]> shortcutTargets,
|
||||||
final ArrayList<Integer> shortcutProbabilities) {
|
final ArrayList<Integer> shortcutProbabilities) {
|
||||||
mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
|
mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
|
||||||
|
@ -98,15 +102,15 @@ public final class WordProperty implements Comparable<WordProperty> {
|
||||||
mHasShortcuts = hasShortcuts;
|
mHasShortcuts = hasShortcuts;
|
||||||
mHasNgrams = hasBigram;
|
mHasNgrams = hasBigram;
|
||||||
|
|
||||||
final int relatedNgramCount = bigramTargets.size();
|
final int relatedNgramCount = ngramTargets.size();
|
||||||
final WordInfo currentWordInfo =
|
final WordInfo currentWordInfo =
|
||||||
mIsBeginningOfSentence ? WordInfo.BEGINNING_OF_SENTENCE : new WordInfo(mWord);
|
mIsBeginningOfSentence ? WordInfo.BEGINNING_OF_SENTENCE : new WordInfo(mWord);
|
||||||
final NgramContext ngramContext = new NgramContext(currentWordInfo);
|
final NgramContext ngramContext = new NgramContext(currentWordInfo);
|
||||||
for (int i = 0; i < relatedNgramCount; i++) {
|
for (int i = 0; i < relatedNgramCount; i++) {
|
||||||
final String ngramTargetString =
|
final String ngramTargetString =
|
||||||
StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i));
|
StringUtils.getStringFromNullTerminatedCodePointArray(ngramTargets.get(i));
|
||||||
final WeightedString ngramTarget = new WeightedString(ngramTargetString,
|
final WeightedString ngramTarget = new WeightedString(ngramTargetString,
|
||||||
createProbabilityInfoFromArray(bigramProbabilityInfo.get(i)));
|
createProbabilityInfoFromArray(ngramProbabilityInfo.get(i)));
|
||||||
// TODO: Support n-gram.
|
// TODO: Support n-gram.
|
||||||
ngrams.add(new NgramProperty(ngramTarget, ngramContext));
|
ngrams.add(new NgramProperty(ngramTarget, ngramContext));
|
||||||
}
|
}
|
||||||
|
@ -180,7 +184,8 @@ public final class WordProperty implements Comparable<WordProperty> {
|
||||||
&& mHasNgrams == w.mHasNgrams && mHasShortcuts && w.mHasNgrams;
|
&& mHasNgrams == w.mHasNgrams && mHasShortcuts && w.mHasNgrams;
|
||||||
}
|
}
|
||||||
|
|
||||||
private <T> boolean equals(final ArrayList<T> a, final ArrayList<T> b) {
|
// TDOO: Have a utility method like java.util.Objects.equals.
|
||||||
|
private static <T> boolean equals(final ArrayList<T> a, final ArrayList<T> b) {
|
||||||
if (null == a) {
|
if (null == a) {
|
||||||
return null == b;
|
return null == b;
|
||||||
}
|
}
|
||||||
|
|
|
@ -327,8 +327,9 @@ static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz,
|
||||||
|
|
||||||
static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
|
static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
|
||||||
jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints,
|
jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints,
|
||||||
jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets,
|
jbooleanArray outFlags, jintArray outProbabilityInfo, jobject /* outNgramPrevWordsArray */,
|
||||||
jobject outBigramProbabilityInfo, jobject outShortcutTargets,
|
jobject /* outNgramPrevWordIsBeginningOfSentenceArray */, jobject outNgramTargets,
|
||||||
|
jobject outNgramProbabilityInfo, jobject outShortcutTargets,
|
||||||
jobject outShortcutProbabilities) {
|
jobject outShortcutProbabilities) {
|
||||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||||
if (!dictionary) return;
|
if (!dictionary) return;
|
||||||
|
@ -351,7 +352,7 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
|
||||||
const WordProperty wordProperty = dictionary->getWordProperty(
|
const WordProperty wordProperty = dictionary->getWordProperty(
|
||||||
CodePointArrayView(wordCodePoints, codePointCount));
|
CodePointArrayView(wordCodePoints, codePointCount));
|
||||||
wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
|
wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
|
||||||
outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
|
outNgramTargets, outNgramProbabilityInfo, outShortcutTargets,
|
||||||
outShortcutProbabilities);
|
outShortcutProbabilities);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -718,7 +719,8 @@ static const JNINativeMethod sMethods[] = {
|
||||||
{
|
{
|
||||||
const_cast<char *>("getWordPropertyNative"),
|
const_cast<char *>("getWordPropertyNative"),
|
||||||
const_cast<char *>("(J[IZ[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
|
const_cast<char *>("(J[IZ[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
|
||||||
"Ljava/util/ArrayList;Ljava/util/ArrayList;)V"),
|
"Ljava/util/ArrayList;Ljava/util/ArrayList;Ljava/util/ArrayList;"
|
||||||
|
"Ljava/util/ArrayList;)V"),
|
||||||
reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty)
|
reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty)
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue