Merge "Add a jni method to fetch unigram information."

This commit is contained in:
Keisuke Kuroyanagi 2013-12-06 08:53:21 +00:00 committed by Android (Google) Code Review
commit fc16c320c6
13 changed files with 403 additions and 2 deletions

View file

@ -26,6 +26,7 @@ import com.android.inputmethod.latin.settings.NativeSuggestOptions;
import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.JniUtils; import com.android.inputmethod.latin.utils.JniUtils;
import com.android.inputmethod.latin.utils.StringUtils; import com.android.inputmethod.latin.utils.StringUtils;
import com.android.inputmethod.latin.utils.UnigramProperty;
import java.io.File; import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
@ -59,6 +60,19 @@ public final class BinaryDictionary extends Dictionary {
public static final int NOT_A_VALID_TIMESTAMP = -1; public static final int NOT_A_VALID_TIMESTAMP = -1;
// Format to get unigram flags from native side via getUnigramPropertyNative().
private static final int FORMAT_UNIGRAM_PROPERTY_OUTPUT_FLAG_COUNT = 4;
private static final int FORMAT_UNIGRAM_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
private static final int FORMAT_UNIGRAM_PROPERTY_IS_BLACKLISTED_INDEX = 1;
private static final int FORMAT_UNIGRAM_PROPERTY_HAS_BIGRAMS_INDEX = 2;
private static final int FORMAT_UNIGRAM_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
// Format to get unigram historical info from native side via getUnigramPropertyNative().
private static final int FORMAT_UNIGRAM_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT = 3;
private static final int FORMAT_UNIGRAM_PROPERTY_TIMESTAMP_INDEX = 0;
private static final int FORMAT_UNIGRAM_PROPERTY_LEVEL_INDEX = 1;
private static final int FORMAT_UNIGRAM_PROPERTY_COUNT_INDEX = 2;
private long mNativeDict; private long mNativeDict;
private final Locale mLocale; private final Locale mLocale;
private final long mDictSize; private final long mDictSize;
@ -128,6 +142,10 @@ public final class BinaryDictionary extends Dictionary {
private static native int getFormatVersionNative(long dict); private static native int getFormatVersionNative(long dict);
private static native int getProbabilityNative(long dict, int[] word); private static native int getProbabilityNative(long dict, int[] word);
private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1); private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
private static native void getUnigramPropertyNative(long dict, int[] word,
int[] outCodePoints, boolean[] outFlags, int[] outProbability,
int[] outHistoricalInfo, ArrayList<int[]> outShortcutTargets,
ArrayList<Integer> outShortcutProbabilities);
private static native int getSuggestionsNative(long dict, long proximityInfo, private static native int getSuggestionsNative(long dict, long proximityInfo,
long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint, int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint,
@ -285,6 +303,32 @@ public final class BinaryDictionary extends Dictionary {
return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1); return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1);
} }
@UsedForTesting
public UnigramProperty getUnigramProperty(final String word) {
if (TextUtils.isEmpty(word)) {
return null;
}
final int[] codePoints = StringUtils.toCodePointArray(word);
final int[] outCodePoints = new int[MAX_WORD_LENGTH];
final boolean[] outFlags = new boolean[FORMAT_UNIGRAM_PROPERTY_OUTPUT_FLAG_COUNT];
final int[] outProbability = new int[1];
final int[] outHistoricalInfo =
new int[FORMAT_UNIGRAM_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT];
final ArrayList<int[]> outShortcutTargets = CollectionUtils.newArrayList();
final ArrayList<Integer> outShortcutProbabilities = CollectionUtils.newArrayList();
getUnigramPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbability,
outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
return new UnigramProperty(codePoints,
outFlags[FORMAT_UNIGRAM_PROPERTY_IS_NOT_A_WORD_INDEX],
outFlags[FORMAT_UNIGRAM_PROPERTY_IS_BLACKLISTED_INDEX],
outFlags[FORMAT_UNIGRAM_PROPERTY_HAS_BIGRAMS_INDEX],
outFlags[FORMAT_UNIGRAM_PROPERTY_HAS_SHORTCUTS_INDEX], outProbability[0],
outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_TIMESTAMP_INDEX],
outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_LEVEL_INDEX],
outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_COUNT_INDEX],
outShortcutTargets, outShortcutProbabilities);
}
// Add a unigram entry to binary dictionary with unigram attributes in native code. // Add a unigram entry to binary dictionary with unigram attributes in native code.
public void addUnigramWord(final String word, final int probability, public void addUnigramWord(final String word, final int probability,
final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord, final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord,

View file

@ -0,0 +1,82 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin.utils;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import java.util.ArrayList;
// This has information that belong to a unigram. This class has some detailed attributes such as
// historical information but they have to be checked only for testing purpose.
@UsedForTesting
public class UnigramProperty {
public final String mCodePoints;
public final boolean mIsNotAWord;
public final boolean mIsBlacklisted;
public final boolean mHasBigrams;
public final boolean mHasShortcuts;
public final int mProbability;
// mTimestamp, mLevel and mCount are historical info. These values are depend on the
// implementation in native code; thus, we must not use them and have any assumptions about
// them except for tests.
public final int mTimestamp;
public final int mLevel;
public final int mCount;
public final ArrayList<WeightedString> mShortcutTargets = CollectionUtils.newArrayList();
private static int getCodePointCount(final int[] codePoints) {
for (int i = 0; i < codePoints.length; i++) {
if (codePoints[i] == 0) {
return i;
}
}
return codePoints.length;
}
// This represents invalid unigram when the probability is BinaryDictionary.NOT_A_PROBABILITY.
public UnigramProperty(final int[] codePoints, final boolean isNotAWord,
final boolean isBlacklisted, final boolean hasBigram,
final boolean hasShortcuts, final int probability, final int timestamp,
final int level, final int count, final ArrayList<int[]> shortcutTargets,
final ArrayList<Integer> shortcutProbabilities) {
mCodePoints = new String(codePoints, 0 /* offset */, getCodePointCount(codePoints));
mIsNotAWord = isNotAWord;
mIsBlacklisted = isBlacklisted;
mHasBigrams = hasBigram;
mHasShortcuts = hasShortcuts;
mProbability = probability;
mTimestamp = timestamp;
mLevel = level;
mCount = count;
final int shortcutTargetCount = shortcutTargets.size();
for (int i = 0; i < shortcutTargetCount; i++) {
final int[] shortcutTargetCodePointArray = shortcutTargets.get(i);
final String shortcutTargetString = new String(shortcutTargetCodePointArray,
0 /* offset */, getCodePointCount(shortcutTargetCodePointArray));
mShortcutTargets.add(
new WeightedString(shortcutTargetString, shortcutProbabilities.get(i)));
}
}
@UsedForTesting
public boolean isValid() {
return mProbability != BinaryDictionary.NOT_A_PROBABILITY;
}
}

View file

@ -58,7 +58,8 @@ LATIN_IME_CORE_SRC_FILES := \
dictionary.cpp \ dictionary.cpp \
digraph_utils.cpp \ digraph_utils.cpp \
error_type_utils.cpp \ error_type_utils.cpp \
multi_bigram_map.cpp) \ multi_bigram_map.cpp \
unigram_property.cpp) \
$(addprefix suggest/core/layout/, \ $(addprefix suggest/core/layout/, \
additional_proximity_chars.cpp \ additional_proximity_chars.cpp \
proximity_info.cpp \ proximity_info.cpp \

View file

@ -24,6 +24,7 @@
#include "jni.h" #include "jni.h"
#include "jni_common.h" #include "jni_common.h"
#include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/unigram_property.h"
#include "suggest/core/suggest_options.h" #include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" #include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
@ -258,6 +259,21 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c
word1Length); word1Length);
} }
static void latinime_BinaryDictionary_getUnigramProperty(JNIEnv *env, jclass clazz,
jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags,
jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
jobject outShortcutProbabilities) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
const jsize wordLength = env->GetArrayLength(word);
int wordCodePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
const UnigramProperty unigramProperty = dictionary->getUnigramProperty(
wordCodePoints, wordLength);
unigramProperty.outputProperties(env, outCodePoints, outFlags, outProbability,
outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
}
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz, static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
jintArray before, jintArray after, jint score) { jintArray before, jintArray after, jint score) {
jsize beforeLength = env->GetArrayLength(before); jsize beforeLength = env->GetArrayLength(before);
@ -333,7 +349,6 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz
word1Length); word1Length);
} }
// Returns how many language model params are processed. // Returns how many language model params are processed.
static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz, static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz,
jlong dict, jobjectArray languageModelParams, jint startIndex) { jlong dict, jobjectArray languageModelParams, jint startIndex) {
@ -493,6 +508,11 @@ static const JNINativeMethod sMethods[] = {
const_cast<char *>("(J[I[I)I"), const_cast<char *>("(J[I[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability) reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
}, },
{
const_cast<char *>("getUnigramPropertyNative"),
const_cast<char *>("(J[I[I[Z[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getUnigramProperty)
},
{ {
const_cast<char *>("calcNormalizedScoreNative"), const_cast<char *>("calcNormalizedScoreNative"),
const_cast<char *>("([I[II)F"), const_cast<char *>("([I[II)F"),

View file

@ -143,6 +143,13 @@ void Dictionary::getProperty(const char *const query, const int queryLength, cha
maxResultLength); maxResultLength);
} }
const UnigramProperty Dictionary::getUnigramProperty(const int *const codePoints,
const int codePointCount) {
TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy.get()->getUnigramProperty(
codePoints, codePointCount);
}
void Dictionary::logDictionaryInfo(JNIEnv *const env) const { void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];

View file

@ -22,6 +22,7 @@
#include "defines.h" #include "defines.h"
#include "jni.h" #include "jni.h"
#include "suggest/core/dictionary/bigram_dictionary.h" #include "suggest/core/dictionary/bigram_dictionary.h"
#include "suggest/core/dictionary/unigram_property.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/suggest_interface.h" #include "suggest/core/suggest_interface.h"
@ -33,6 +34,7 @@ class DictionaryStructureWithBufferPolicy;
class DicTraverseSession; class DicTraverseSession;
class ProximityInfo; class ProximityInfo;
class SuggestOptions; class SuggestOptions;
class UnigramProperty;
class Dictionary { class Dictionary {
public: public:
@ -92,6 +94,8 @@ class Dictionary {
void getProperty(const char *const query, const int queryLength, char *const outResult, void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength); const int maxResultLength);
const UnigramProperty getUnigramProperty(const int *const codePoints, const int codePointCount);
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const { const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
return mDictionaryStructureWithBufferPolicy.get(); return mDictionaryStructureWithBufferPolicy.get();
} }

View file

@ -0,0 +1,52 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/core/dictionary/unigram_property.h"
namespace latinime {
void UnigramProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
jbooleanArray outFlags, jintArray outProbability, jintArray outHistoricalInfo,
jobject outShortcutTargets, jobject outShortcutProbabilities) const {
env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePointCount, mCodePoints);
jboolean flags[] = {mIsNotAWord, mIsBlacklisted, mHasBigrams, mHasShortcuts};
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
env->SetIntArrayRegion(outProbability, 0 /* start */, 1 /* len */, &mProbability);
int historicalInfo[] = {mTimestamp, mLevel, mCount};
env->SetIntArrayRegion(outHistoricalInfo, 0 /* start */, NELEMS(historicalInfo),
historicalInfo);
jclass integerClass = env->FindClass("java/lang/Integer");
jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V");
jclass arrayListClass = env->FindClass("java/util/ArrayList");
jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
const int shortcutTargetCount = mShortcutTargets.size();
for (int i = 0; i < shortcutTargetCount; ++i) {
jintArray shortcutTargetCodePointArray = env->NewIntArray(mShortcutTargets[i].size());
env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */,
mShortcutTargets[i].size(), &mShortcutTargets[i][0]);
env->CallVoidMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray);
env->DeleteLocalRef(shortcutTargetCodePointArray);
jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId,
mShortcutProbabilities[i]);
env->CallVoidMethod(outShortcutProbabilities, addMethodId, integerProbability);
env->DeleteLocalRef(integerProbability);
}
env->DeleteLocalRef(integerClass);
env->DeleteLocalRef(arrayListClass);
}
} // namespace latinime

View file

@ -0,0 +1,87 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_UNIGRAM_PROPERTY_H
#define LATINIME_UNIGRAM_PROPERTY_H
#include <vector>
#include "defines.h"
#include "jni.h"
namespace latinime {
// This class is used for returning information belonging to a unigram to java side.
class UnigramProperty {
public:
// Invalid unigram.
UnigramProperty()
: mCodePoints(), mCodePointCount(0), mIsNotAWord(false), mIsBlacklisted(false),
mHasBigrams(false), mHasShortcuts(false), mProbability(NOT_A_PROBABILITY),
mTimestamp(0), mLevel(0), mCount(0), mShortcutTargets(),
mShortcutProbabilities() {}
UnigramProperty(const UnigramProperty &unigramProperty)
: mCodePoints(), mCodePointCount(unigramProperty.mCodePointCount),
mIsNotAWord(unigramProperty.mIsNotAWord),
mIsBlacklisted(unigramProperty.mIsBlacklisted),
mHasBigrams(unigramProperty.mHasBigrams),
mHasShortcuts(unigramProperty.mHasShortcuts),
mProbability(unigramProperty.mProbability),
mTimestamp(unigramProperty.mTimestamp), mLevel(unigramProperty.mLevel),
mCount(unigramProperty.mCount), mShortcutTargets(unigramProperty.mShortcutTargets),
mShortcutProbabilities(unigramProperty.mShortcutProbabilities) {
memcpy(mCodePoints, unigramProperty.mCodePoints, sizeof(mCodePoints));
}
UnigramProperty(const int *const codePoints, const int codePointCount,
const bool isNotAWord, const bool isBlacklisted, const bool hasBigrams,
const bool hasShortcuts, const int probability, const int timestamp,
const int level, const int count,
const std::vector<std::vector<int> > *const shortcutTargets,
const std::vector<int> *const shortcutProbabilities)
: mCodePoints(), mCodePointCount(codePointCount),
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mHasBigrams(hasBigrams),
mHasShortcuts(hasShortcuts), mProbability(probability), mTimestamp(timestamp),
mLevel(level), mCount(count), mShortcutTargets(*shortcutTargets),
mShortcutProbabilities(*shortcutProbabilities) {
memcpy(mCodePoints, codePoints, sizeof(mCodePoints));
}
void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
jobject outShortcutProbabilities) const;
private:
DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
int mCodePoints[MAX_WORD_LENGTH];
int mCodePointCount;
bool mIsNotAWord;
bool mIsBlacklisted;
bool mHasBigrams;
bool mHasShortcuts;
int mProbability;
// Historical information
int mTimestamp;
int mLevel;
int mCount;
// Shortcut
std::vector<std::vector<int> > mShortcutTargets;
std::vector<int> mShortcutProbabilities;
};
} // namespace latinime
#endif // LATINIME_UNIGRAM_PROPERTY_H

View file

@ -18,6 +18,7 @@
#define LATINIME_DICTIONARY_STRUCTURE_POLICY_H #define LATINIME_DICTIONARY_STRUCTURE_POLICY_H
#include "defines.h" #include "defines.h"
#include "suggest/core/dictionary/unigram_property.h"
#include "utils/exclusive_ownership_pointer.h" #include "utils/exclusive_ownership_pointer.h"
namespace latinime { namespace latinime {
@ -90,6 +91,10 @@ class DictionaryStructureWithBufferPolicy {
virtual void getProperty(const char *const query, const int queryLength, char *const outResult, virtual void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength) = 0; const int maxResultLength) = 0;
// Used for testing.
virtual const UnigramProperty getUnigramProperty(const int *const codePonts,
const int codePointCount) const = 0;
protected: protected:
DictionaryStructureWithBufferPolicy() {} DictionaryStructureWithBufferPolicy() {}

View file

@ -123,6 +123,12 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
} }
} }
const UnigramProperty getUnigramProperty(const int *const codePoints,
const int codePointCount) const {
// getUnigramProperty is not supported.
return UnigramProperty();
}
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);

View file

@ -16,8 +16,11 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
#include <vector>
#include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/unigram_property.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
@ -290,4 +293,42 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer
} }
} }
const UnigramProperty Ver4PatriciaTriePolicy::getUnigramProperty(const int *const codePoints,
const int codePointCount) const {
const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
false /* forceLowerCaseSearch */);
if (ptNodePos == NOT_A_DICT_POS) {
AKLOGE("fetchUnigramProperty is called for invalid word.");
return UnigramProperty();
}
const PtNodeParams ptNodeParams = mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
const ProbabilityEntry probabilityEntry =
mBuffers.get()->getProbabilityDictContent()->getProbabilityEntry(
ptNodeParams.getTerminalId());
// Fetch shortcut information.
std::vector<std::vector<int> > shortcutTargets;
std::vector<int> shortcutProbabilities;
if (ptNodeParams.hasShortcutTargets()) {
int shortcutTarget[MAX_WORD_LENGTH];
const ShortcutDictContent *const shortcutDictContent =
mBuffers.get()->getShortcutDictContent();
bool hasNext = true;
int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
while (hasNext) {
int shortcutTargetLength = 0;
int shortcutProbability = NOT_A_PROBABILITY;
shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
&shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
shortcutTargets.push_back(target);
shortcutProbabilities.push_back(shortcutProbability);
}
}
return UnigramProperty(ptNodeParams.getCodePoints(), ptNodeParams.getCodePointCount(),
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(),
probabilityEntry.getTimeStamp(), probabilityEntry.getLevel(),
probabilityEntry.getCount(), &shortcutTargets, &shortcutProbabilities);
}
} // namespace latinime } // namespace latinime

View file

@ -107,6 +107,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
void getProperty(const char *const query, const int queryLength, char *const outResult, void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength); const int maxResultLength);
const UnigramProperty getUnigramProperty(const int *const codePoints,
const int codePointCount) const;
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);

View file

@ -24,6 +24,7 @@ import android.util.Pair;
import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam; import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam;
import com.android.inputmethod.latin.makedict.CodePointUtils; import com.android.inputmethod.latin.makedict.CodePointUtils;
import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.utils.UnigramProperty;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
@ -824,4 +825,52 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(probability, binaryDictionary.getBigramProbability(word0, word1)); assertEquals(probability, binaryDictionary.getBigramProbability(word0, word1));
} }
} }
public void testGetUnigramProperties() {
testGetUnigramProperties(4 /* formatVersion */);
}
private void testGetUnigramProperties(final int formatVersion) {
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
final int ITERATION_COUNT = 1000;
final int codePointSetSize = 20;
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final UnigramProperty invalidUnigramProperty =
binaryDictionary.getUnigramProperty("dummyWord");
assertFalse(invalidUnigramProperty.isValid());
for (int i = 0; i < ITERATION_COUNT; i++) {
final String word = CodePointUtils.generateWord(random, codePointSet);
final int unigramProbability = random.nextInt(0xFF);
final boolean isNotAWord = random.nextBoolean();
final boolean isBlacklisted = random.nextBoolean();
// TODO: Add tests for shortcut.
// TODO: Add tests for historical info.
binaryDictionary.addUnigramWord(word, unigramProbability,
null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
final UnigramProperty unigramProperty =
binaryDictionary.getUnigramProperty(word);
assertEquals(word, unigramProperty.mCodePoints);
assertTrue(unigramProperty.isValid());
assertEquals(isNotAWord, unigramProperty.mIsNotAWord);
assertEquals(isBlacklisted, unigramProperty.mIsBlacklisted);
assertEquals(false, unigramProperty.mHasBigrams);
assertEquals(false, unigramProperty.mHasShortcuts);
assertEquals(unigramProbability, unigramProperty.mProbability);
assertTrue(unigramProperty.mShortcutTargets.isEmpty());
}
}
} }