From 9221772ab7f112f6ef9136a69d0502befbdc544e Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Mon, 3 Feb 2014 11:47:26 +0900 Subject: [PATCH] Get bigram information via getWordProperty(). Bug: 12810574 Change-Id: I2750a5659ccbc3e31307c28e35dd9a1dbdffbeee --- .../suggest/core/dictionary/word_property.cpp | 22 +++++- .../suggest/core/dictionary/word_property.h | 20 +++++ .../latin/BinaryDictionaryTests.java | 77 ++++++++++++++++--- 3 files changed, 107 insertions(+), 12 deletions(-) diff --git a/native/jni/src/suggest/core/dictionary/word_property.cpp b/native/jni/src/suggest/core/dictionary/word_property.cpp index d8c330bbd..288e6b05e 100644 --- a/native/jni/src/suggest/core/dictionary/word_property.cpp +++ b/native/jni/src/suggest/core/dictionary/word_property.cpp @@ -34,7 +34,27 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, jclass arrayListClass = env->FindClass("java/util/ArrayList"); jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); - // TODO: Output bigrams. + // Output bigrams. + const int bigramCount = mBigrams.size(); + for (int i = 0; i < bigramCount; ++i) { + const BigramProperty *const bigramProperty = &mBigrams[i]; + const std::vector *const word1CodePoints = bigramProperty->getTargetCodePoints(); + jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size()); + env->SetIntArrayRegion(bigramWord1CodePointArray, 0 /* start */, + word1CodePoints->size(), &word1CodePoints->at(0)); + env->CallVoidMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray); + env->DeleteLocalRef(bigramWord1CodePointArray); + + int bigramProbabilityInfo[] = {bigramProperty->getProbability(), + bigramProperty->getTimestamp(), bigramProperty->getLevel(), + bigramProperty->getCount()}; + jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo)); + env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */, + NELEMS(bigramProbabilityInfo), bigramProbabilityInfo); + env->CallVoidMethod(outBigramProbabilities, addMethodId, bigramProbabilityInfoArray); + env->DeleteLocalRef(bigramProbabilityInfoArray); + } + // Output shortcuts. const int shortcutTargetCount = mShortcuts.size(); for (int i = 0; i < shortcutTargetCount; ++i) { diff --git a/native/jni/src/suggest/core/dictionary/word_property.h b/native/jni/src/suggest/core/dictionary/word_property.h index cc06b1baa..40b1a91a4 100644 --- a/native/jni/src/suggest/core/dictionary/word_property.h +++ b/native/jni/src/suggest/core/dictionary/word_property.h @@ -35,6 +35,26 @@ class WordProperty { : mTargetCodePoints(*targetCodePoints), mProbability(probability), mTimestamp(timestamp), mLevel(level), mCount(count) {} + const std::vector *getTargetCodePoints() const { + return &mTargetCodePoints; + } + + int getProbability() const { + return mProbability; + } + + int getTimestamp() const { + return mTimestamp; + } + + int getLevel() const { + return mLevel; + } + + int getCount() const { + return mCount; + } + private: std::vector mTargetCodePoints; int mProbability; diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 5294bb006..e39b46f94 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -878,7 +878,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { private void testGetWordProperties(final int formatVersion) { final long seed = System.currentTimeMillis(); final Random random = new Random(seed); - final int ITERATION_COUNT = 1000; + final int UNIGRAM_COUNT = 1000; + final int BIGRAM_COUNT = 1000; final int codePointSetSize = 20; final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); @@ -895,7 +896,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord"); assertFalse(invalidWordProperty.isValid()); - for (int i = 0; i < ITERATION_COUNT; i++) { + final ArrayList words = new ArrayList(); + final HashMap wordProbabilities = new HashMap(); + final HashMap> bigrams = new HashMap>(); + final HashMap, Integer> bigramProbabilities = + new HashMap, Integer>(); + + for (int i = 0; i < UNIGRAM_COUNT; i++) { final String word = CodePointUtils.generateWord(random, codePointSet); final int unigramProbability = random.nextInt(0xFF); final boolean isNotAWord = random.nextBoolean(); @@ -904,15 +911,63 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.addUnigramWord(word, unigramProbability, null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY, isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP); - final WordProperty wordProperty = binaryDictionary.getWordProperty(word); - assertEquals(word, wordProperty.mCodePoints); - assertTrue(wordProperty.isValid()); - assertEquals(isNotAWord, wordProperty.mIsNotAWord); - assertEquals(isBlacklisted, wordProperty.mIsBlacklisted); - assertEquals(false, wordProperty.mHasBigrams); - assertEquals(false, wordProperty.mHasShortcuts); - assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability); - assertTrue(wordProperty.mShortcutTargets.isEmpty()); + if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + words.add(word); + wordProbabilities.put(word, unigramProbability); + final WordProperty unigramProperty = binaryDictionary.getWordProperty(word); + assertEquals(word, unigramProperty.mCodePoints); + assertTrue(unigramProperty.isValid()); + assertEquals(isNotAWord, unigramProperty.mIsNotAWord); + assertEquals(isBlacklisted, unigramProperty.mIsBlacklisted); + assertEquals(false, unigramProperty.mHasBigrams); + assertEquals(false, unigramProperty.mHasShortcuts); + assertEquals(unigramProbability, unigramProperty.mProbabilityInfo.mProbability); + assertTrue(unigramProperty.mShortcutTargets.isEmpty()); + } + + for (int i = 0; i < BIGRAM_COUNT; i++) { + final int word0Index = random.nextInt(wordProbabilities.size()); + final int word1Index = random.nextInt(wordProbabilities.size()); + if (word0Index == word1Index) { + continue; + } + final String word0 = words.get(word0Index); + final String word1 = words.get(word1Index); + final int bigramProbability = random.nextInt(0xF); + binaryDictionary.addBigramWords(word0, word1, bigramProbability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP); + if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + if (!bigrams.containsKey(word0)) { + final HashSet bigramWord1s = new HashSet(); + bigrams.put(word0, bigramWord1s); + } + bigrams.get(word0).add(word1); + bigramProbabilities.put(new Pair(word0, word1), bigramProbability); + } + + for (int i = 0; i < words.size(); i++) { + final String word0 = words.get(i); + if (!bigrams.containsKey(word0)) { + continue; + } + final HashSet bigramWord1s = bigrams.get(word0); + final WordProperty unigramProperty = binaryDictionary.getWordProperty(word0); + assertEquals(bigramWord1s.size(), unigramProperty.mBigramTargets.size()); + assertEquals(unigramProperty.mBigramTargets.size(), + unigramProperty.mBigramProbabilityInfo.size()); + for (int j = 0; j < unigramProperty.mBigramTargets.size(); j++) { + final String word1 = unigramProperty.mBigramTargets.get(j).mWord; + assertTrue(bigramWord1s.contains(word1)); + final int probability = unigramProperty.mBigramTargets.get(j).mFrequency; + assertEquals((int)bigramProbabilities.get(new Pair(word0, word1)), + probability); + assertEquals(unigramProperty.mBigramProbabilityInfo.get(j).mProbability, + probability); + } } }