Rename UnigramProperty to WordProperty.

Bug: 12810574
Change-Id: If5ddd803948aaf6e491ddcbaa5436fb3af3f7257
This commit is contained in:
Keisuke Kuroyanagi 2014-01-31 11:06:42 +09:00
parent 5f7f6a1615
commit 080a35e959
13 changed files with 71 additions and 73 deletions

View file

@ -27,7 +27,7 @@ import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.JniUtils;
import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.StringUtils;
import com.android.inputmethod.latin.utils.UnigramProperty;
import com.android.inputmethod.latin.utils.WordProperty;
import java.io.File;
import java.util.ArrayList;
@ -61,18 +61,18 @@ public final class BinaryDictionary extends Dictionary {
public static final int NOT_A_VALID_TIMESTAMP = -1;
// Format to get unigram flags from native side via getUnigramPropertyNative().
private static final int FORMAT_UNIGRAM_PROPERTY_OUTPUT_FLAG_COUNT = 4;
private static final int FORMAT_UNIGRAM_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
private static final int FORMAT_UNIGRAM_PROPERTY_IS_BLACKLISTED_INDEX = 1;
private static final int FORMAT_UNIGRAM_PROPERTY_HAS_BIGRAMS_INDEX = 2;
private static final int FORMAT_UNIGRAM_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
// Format to get unigram flags from native side via getWordPropertyNative().
private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 4;
private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
private static final int FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX = 1;
private static final int FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX = 2;
private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
// Format to get unigram historical info from native side via getUnigramPropertyNative().
private static final int FORMAT_UNIGRAM_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT = 3;
private static final int FORMAT_UNIGRAM_PROPERTY_TIMESTAMP_INDEX = 0;
private static final int FORMAT_UNIGRAM_PROPERTY_LEVEL_INDEX = 1;
private static final int FORMAT_UNIGRAM_PROPERTY_COUNT_INDEX = 2;
// Format to get unigram historical info from native side via getWordPropertyNative().
private static final int FORMAT_WORD_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT = 3;
private static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 0;
private static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 1;
private static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 2;
private long mNativeDict;
private final Locale mLocale;
@ -143,7 +143,7 @@ public final class BinaryDictionary extends Dictionary {
private static native int getFormatVersionNative(long dict);
private static native int getProbabilityNative(long dict, int[] word);
private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
private static native void getUnigramPropertyNative(long dict, int[] word,
private static native void getWordPropertyNative(long dict, int[] word,
int[] outCodePoints, boolean[] outFlags, int[] outProbability,
int[] outHistoricalInfo, ArrayList<int[]> outShortcutTargets,
ArrayList<Integer> outShortcutProbabilities);
@ -306,28 +306,28 @@ public final class BinaryDictionary extends Dictionary {
}
@UsedForTesting
public UnigramProperty getUnigramProperty(final String word) {
public WordProperty getWordProperty(final String word) {
if (TextUtils.isEmpty(word)) {
return null;
}
final int[] codePoints = StringUtils.toCodePointArray(word);
final int[] outCodePoints = new int[MAX_WORD_LENGTH];
final boolean[] outFlags = new boolean[FORMAT_UNIGRAM_PROPERTY_OUTPUT_FLAG_COUNT];
final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT];
final int[] outProbability = new int[1];
final int[] outHistoricalInfo =
new int[FORMAT_UNIGRAM_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT];
new int[FORMAT_WORD_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT];
final ArrayList<int[]> outShortcutTargets = CollectionUtils.newArrayList();
final ArrayList<Integer> outShortcutProbabilities = CollectionUtils.newArrayList();
getUnigramPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbability,
getWordPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbability,
outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
return new UnigramProperty(codePoints,
outFlags[FORMAT_UNIGRAM_PROPERTY_IS_NOT_A_WORD_INDEX],
outFlags[FORMAT_UNIGRAM_PROPERTY_IS_BLACKLISTED_INDEX],
outFlags[FORMAT_UNIGRAM_PROPERTY_HAS_BIGRAMS_INDEX],
outFlags[FORMAT_UNIGRAM_PROPERTY_HAS_SHORTCUTS_INDEX], outProbability[0],
outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_TIMESTAMP_INDEX],
outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_LEVEL_INDEX],
outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_COUNT_INDEX],
return new WordProperty(codePoints,
outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX],
outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX],
outFlags[FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX],
outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], outProbability[0],
outHistoricalInfo[FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX],
outHistoricalInfo[FORMAT_WORD_PROPERTY_LEVEL_INDEX],
outHistoricalInfo[FORMAT_WORD_PROPERTY_COUNT_INDEX],
outShortcutTargets, outShortcutProbabilities);
}

View file

@ -26,7 +26,7 @@ import java.util.ArrayList;
// This has information that belong to a unigram. This class has some detailed attributes such as
// historical information but they have to be checked only for testing purpose.
@UsedForTesting
public class UnigramProperty {
public class WordProperty {
public final String mCodePoints;
public final boolean mIsNotAWord;
public final boolean mIsBlacklisted;
@ -50,8 +50,8 @@ public class UnigramProperty {
return codePoints.length;
}
// This represents invalid unigram when the probability is BinaryDictionary.NOT_A_PROBABILITY.
public UnigramProperty(final int[] codePoints, final boolean isNotAWord,
// This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY.
public WordProperty(final int[] codePoints, final boolean isNotAWord,
final boolean isBlacklisted, final boolean hasBigram,
final boolean hasShortcuts, final int probability, final int timestamp,
final int level, final int count, final ArrayList<int[]> shortcutTargets,

View file

@ -32,7 +32,7 @@ LATIN_IME_CORE_SRC_FILES := \
error_type_utils.cpp \
multi_bigram_map.cpp \
suggestions_output_utils.cpp \
unigram_property.cpp) \
word_property.cpp) \
$(addprefix suggest/core/layout/, \
additional_proximity_chars.cpp \
proximity_info.cpp \

View file

@ -24,7 +24,7 @@
#include "jni.h"
#include "jni_common.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/unigram_property.h"
#include "suggest/core/dictionary/word_property.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
@ -260,7 +260,7 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c
word1Length);
}
static void latinime_BinaryDictionary_getUnigramProperty(JNIEnv *env, jclass clazz,
static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags,
jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
jobject outShortcutProbabilities) {
@ -269,9 +269,8 @@ static void latinime_BinaryDictionary_getUnigramProperty(JNIEnv *env, jclass cla
const jsize wordLength = env->GetArrayLength(word);
int wordCodePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
const UnigramProperty unigramProperty = dictionary->getUnigramProperty(
wordCodePoints, wordLength);
unigramProperty.outputProperties(env, outCodePoints, outFlags, outProbability,
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
wordProperty.outputProperties(env, outCodePoints, outFlags, outProbability,
outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
}
@ -521,9 +520,9 @@ static const JNINativeMethod sMethods[] = {
reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
},
{
const_cast<char *>("getUnigramPropertyNative"),
const_cast<char *>("getWordPropertyNative"),
const_cast<char *>("(J[I[I[Z[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getUnigramProperty)
reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty)
},
{
const_cast<char *>("calcNormalizedScoreNative"),

View file

@ -143,10 +143,10 @@ void Dictionary::getProperty(const char *const query, const int queryLength, cha
maxResultLength);
}
const UnigramProperty Dictionary::getUnigramProperty(const int *const codePoints,
const WordProperty Dictionary::getWordProperty(const int *const codePoints,
const int codePointCount) {
TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy.get()->getUnigramProperty(
return mDictionaryStructureWithBufferPolicy.get()->getWordProperty(
codePoints, codePointCount);
}

View file

@ -22,7 +22,7 @@
#include "defines.h"
#include "jni.h"
#include "suggest/core/dictionary/bigram_dictionary.h"
#include "suggest/core/dictionary/unigram_property.h"
#include "suggest/core/dictionary/word_property.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/suggest_interface.h"
@ -34,7 +34,7 @@ class DictionaryStructureWithBufferPolicy;
class DicTraverseSession;
class ProximityInfo;
class SuggestOptions;
class UnigramProperty;
class WordProperty;
class Dictionary {
public:
@ -94,7 +94,7 @@ class Dictionary {
void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength);
const UnigramProperty getUnigramProperty(const int *const codePoints, const int codePointCount);
const WordProperty getWordProperty(const int *const codePoints, const int codePointCount);
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
return mDictionaryStructureWithBufferPolicy.get();

View file

@ -14,11 +14,11 @@
* limitations under the License.
*/
#include "suggest/core/dictionary/unigram_property.h"
#include "suggest/core/dictionary/word_property.h"
namespace latinime {
void UnigramProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
jbooleanArray outFlags, jintArray outProbability, jintArray outHistoricalInfo,
jobject outShortcutTargets, jobject outShortcutProbabilities) const {
env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePoints.size(), &mCodePoints[0]);

View file

@ -14,8 +14,8 @@
* limitations under the License.
*/
#ifndef LATINIME_UNIGRAM_PROPERTY_H
#define LATINIME_UNIGRAM_PROPERTY_H
#ifndef LATINIME_WORD_PROPERTY_H
#define LATINIME_WORD_PROPERTY_H
#include <cstring>
#include <vector>
@ -25,16 +25,17 @@
namespace latinime {
// This class is used for returning information belonging to a unigram to java side.
class UnigramProperty {
// This class is used for returning information belonging to a word to java side.
class WordProperty {
public:
// Invalid unigram.
UnigramProperty()
// TODO: Add bigram information.
// Invalid word.
WordProperty()
: mCodePoints(), mIsNotAWord(false), mIsBlacklisted(false),
mHasBigrams(false), mHasShortcuts(false), mProbability(NOT_A_PROBABILITY),
mTimestamp(0), mLevel(0), mCount(0), mShortcutTargets(), mShortcutProbabilities() {}
UnigramProperty(const std::vector<int> *const codePoints,
WordProperty(const std::vector<int> *const codePoints,
const bool isNotAWord, const bool isBlacklisted, const bool hasBigrams,
const bool hasShortcuts, const int probability, const int timestamp,
const int level, const int count,
@ -50,7 +51,7 @@ class UnigramProperty {
jobject outShortcutProbabilities) const;
private:
DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
DISALLOW_ASSIGNMENT_OPERATOR(WordProperty);
std::vector<int> mCodePoints;
bool mIsNotAWord;
@ -67,4 +68,4 @@ class UnigramProperty {
std::vector<int> mShortcutProbabilities;
};
} // namespace latinime
#endif // LATINIME_UNIGRAM_PROPERTY_H
#endif // LATINIME_WORD_PROPERTY_H

View file

@ -18,7 +18,7 @@
#define LATINIME_DICTIONARY_STRUCTURE_POLICY_H
#include "defines.h"
#include "suggest/core/dictionary/unigram_property.h"
#include "suggest/core/dictionary/word_property.h"
#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
@ -92,7 +92,7 @@ class DictionaryStructureWithBufferPolicy {
const int maxResultLength) = 0;
// Used for testing.
virtual const UnigramProperty getUnigramProperty(const int *const codePonts,
virtual const WordProperty getWordProperty(const int *const codePonts,
const int codePointCount) const = 0;
protected:

View file

@ -123,10 +123,10 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
}
}
const UnigramProperty getUnigramProperty(const int *const codePoints,
const WordProperty getWordProperty(const int *const codePoints,
const int codePointCount) const {
// getUnigramProperty is not supported.
return UnigramProperty();
// getWordProperty is not supported.
return WordProperty();
}
private:

View file

@ -20,7 +20,7 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/unigram_property.h"
#include "suggest/core/dictionary/word_property.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
@ -317,13 +317,13 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer
}
}
const UnigramProperty Ver4PatriciaTriePolicy::getUnigramProperty(const int *const codePoints,
const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints,
const int codePointCount) const {
const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
false /* forceLowerCaseSearch */);
if (ptNodePos == NOT_A_DICT_POS) {
AKLOGE("fetchUnigramProperty is called for invalid word.");
return UnigramProperty();
AKLOGE("getWordProperty is called for invalid word.");
return WordProperty();
}
const PtNodeParams ptNodeParams = mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
@ -351,7 +351,7 @@ const UnigramProperty Ver4PatriciaTriePolicy::getUnigramProperty(const int *cons
shortcutProbabilities.push_back(shortcutProbability);
}
}
return UnigramProperty(&codePointVector, ptNodeParams.isNotAWord(),
return WordProperty(&codePointVector, ptNodeParams.isNotAWord(),
ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(),
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),

View file

@ -106,7 +106,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength);
const UnigramProperty getUnigramProperty(const int *const codePoints,
const WordProperty getWordProperty(const int *const codePoints,
const int codePointCount) const;
private:

View file

@ -26,7 +26,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.FileUtils;
import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.UnigramProperty;
import com.android.inputmethod.latin.utils.WordProperty;
import java.io.File;
import java.io.IOException;
@ -892,8 +892,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final UnigramProperty invalidUnigramProperty =
binaryDictionary.getUnigramProperty("dummyWord");
final WordProperty invalidUnigramProperty = binaryDictionary.getWordProperty("dummyWord");
assertFalse(invalidUnigramProperty.isValid());
for (int i = 0; i < ITERATION_COUNT; i++) {
@ -905,8 +904,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.addUnigramWord(word, unigramProbability,
null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
final UnigramProperty unigramProperty =
binaryDictionary.getUnigramProperty(word);
final WordProperty unigramProperty = binaryDictionary.getWordProperty(word);
assertEquals(word, unigramProperty.mCodePoints);
assertTrue(unigramProperty.isValid());
assertEquals(isNotAWord, unigramProperty.mIsNotAWord);
@ -938,7 +936,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz",
shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
0 /* timestamp */);
UnigramProperty unigramProperty = binaryDictionary.getUnigramProperty("aaa");
WordProperty unigramProperty = binaryDictionary.getWordProperty("aaa");
assertEquals(1, unigramProperty.mShortcutTargets.size());
assertEquals("zzz", unigramProperty.mShortcutTargets.get(0).mWord);
assertEquals(shortcutProbability, unigramProperty.mShortcutTargets.get(0).mFrequency);
@ -946,7 +944,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz",
updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
0 /* timestamp */);
unigramProperty = binaryDictionary.getUnigramProperty("aaa");
unigramProperty = binaryDictionary.getWordProperty("aaa");
assertEquals(1, unigramProperty.mShortcutTargets.size());
assertEquals("zzz", unigramProperty.mShortcutTargets.get(0).mWord);
assertEquals(updatedShortcutProbability,
@ -957,7 +955,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final HashMap<String, Integer> shortcutTargets = new HashMap<String, Integer>();
shortcutTargets.put("zzz", updatedShortcutProbability);
shortcutTargets.put("yyy", shortcutProbability);
unigramProperty = binaryDictionary.getUnigramProperty("aaa");
unigramProperty = binaryDictionary.getWordProperty("aaa");
assertEquals(2, unigramProperty.mShortcutTargets.size());
for (WeightedString shortcutTarget : unigramProperty.mShortcutTargets) {
assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
@ -967,7 +965,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
shortcutTargets.put("zzz", updatedShortcutProbability);
shortcutTargets.put("yyy", shortcutProbability);
binaryDictionary.flushWithGC();
unigramProperty = binaryDictionary.getUnigramProperty("aaa");
unigramProperty = binaryDictionary.getWordProperty("aaa");
assertEquals(2, unigramProperty.mShortcutTargets.size());
for (WeightedString shortcutTarget : unigramProperty.mShortcutTargets) {
assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
@ -1036,7 +1034,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
for (final String word : words) {
final UnigramProperty unigramProperty = binaryDictionary.getUnigramProperty(word);
final WordProperty unigramProperty = binaryDictionary.getWordProperty(word);
assertEquals((int)unigramProbabilities.get(word), unigramProperty.mProbability);
if (!shortcutTargets.containsKey(word)) {
// The word does not have shortcut targets.