am 9221772a: Get bigram information via getWordProperty().
* commit '9221772ab7f112f6ef9136a69d0502befbdc544e': Get bigram information via getWordProperty().main
commit
2b0453b7bf
|
@ -34,7 +34,27 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
|
||||||
jclass arrayListClass = env->FindClass("java/util/ArrayList");
|
jclass arrayListClass = env->FindClass("java/util/ArrayList");
|
||||||
jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
|
jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
|
||||||
|
|
||||||
// TODO: Output bigrams.
|
// Output bigrams.
|
||||||
|
const int bigramCount = mBigrams.size();
|
||||||
|
for (int i = 0; i < bigramCount; ++i) {
|
||||||
|
const BigramProperty *const bigramProperty = &mBigrams[i];
|
||||||
|
const std::vector<int> *const word1CodePoints = bigramProperty->getTargetCodePoints();
|
||||||
|
jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size());
|
||||||
|
env->SetIntArrayRegion(bigramWord1CodePointArray, 0 /* start */,
|
||||||
|
word1CodePoints->size(), &word1CodePoints->at(0));
|
||||||
|
env->CallVoidMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
|
||||||
|
env->DeleteLocalRef(bigramWord1CodePointArray);
|
||||||
|
|
||||||
|
int bigramProbabilityInfo[] = {bigramProperty->getProbability(),
|
||||||
|
bigramProperty->getTimestamp(), bigramProperty->getLevel(),
|
||||||
|
bigramProperty->getCount()};
|
||||||
|
jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
|
||||||
|
env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
|
||||||
|
NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
|
||||||
|
env->CallVoidMethod(outBigramProbabilities, addMethodId, bigramProbabilityInfoArray);
|
||||||
|
env->DeleteLocalRef(bigramProbabilityInfoArray);
|
||||||
|
}
|
||||||
|
|
||||||
// Output shortcuts.
|
// Output shortcuts.
|
||||||
const int shortcutTargetCount = mShortcuts.size();
|
const int shortcutTargetCount = mShortcuts.size();
|
||||||
for (int i = 0; i < shortcutTargetCount; ++i) {
|
for (int i = 0; i < shortcutTargetCount; ++i) {
|
||||||
|
|
|
@ -35,6 +35,26 @@ class WordProperty {
|
||||||
: mTargetCodePoints(*targetCodePoints), mProbability(probability),
|
: mTargetCodePoints(*targetCodePoints), mProbability(probability),
|
||||||
mTimestamp(timestamp), mLevel(level), mCount(count) {}
|
mTimestamp(timestamp), mLevel(level), mCount(count) {}
|
||||||
|
|
||||||
|
const std::vector<int> *getTargetCodePoints() const {
|
||||||
|
return &mTargetCodePoints;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getProbability() const {
|
||||||
|
return mProbability;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getTimestamp() const {
|
||||||
|
return mTimestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getLevel() const {
|
||||||
|
return mLevel;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getCount() const {
|
||||||
|
return mCount;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<int> mTargetCodePoints;
|
std::vector<int> mTargetCodePoints;
|
||||||
int mProbability;
|
int mProbability;
|
||||||
|
|
|
@ -878,7 +878,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
private void testGetWordProperties(final int formatVersion) {
|
private void testGetWordProperties(final int formatVersion) {
|
||||||
final long seed = System.currentTimeMillis();
|
final long seed = System.currentTimeMillis();
|
||||||
final Random random = new Random(seed);
|
final Random random = new Random(seed);
|
||||||
final int ITERATION_COUNT = 1000;
|
final int UNIGRAM_COUNT = 1000;
|
||||||
|
final int BIGRAM_COUNT = 1000;
|
||||||
final int codePointSetSize = 20;
|
final int codePointSetSize = 20;
|
||||||
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
||||||
|
|
||||||
|
@ -895,7 +896,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord");
|
final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord");
|
||||||
assertFalse(invalidWordProperty.isValid());
|
assertFalse(invalidWordProperty.isValid());
|
||||||
|
|
||||||
for (int i = 0; i < ITERATION_COUNT; i++) {
|
final ArrayList<String> words = new ArrayList<String>();
|
||||||
|
final HashMap<String, Integer> wordProbabilities = new HashMap<String, Integer>();
|
||||||
|
final HashMap<String, HashSet<String>> bigrams = new HashMap<String, HashSet<String>>();
|
||||||
|
final HashMap<Pair<String, String>, Integer> bigramProbabilities =
|
||||||
|
new HashMap<Pair<String, String>, Integer>();
|
||||||
|
|
||||||
|
for (int i = 0; i < UNIGRAM_COUNT; i++) {
|
||||||
final String word = CodePointUtils.generateWord(random, codePointSet);
|
final String word = CodePointUtils.generateWord(random, codePointSet);
|
||||||
final int unigramProbability = random.nextInt(0xFF);
|
final int unigramProbability = random.nextInt(0xFF);
|
||||||
final boolean isNotAWord = random.nextBoolean();
|
final boolean isNotAWord = random.nextBoolean();
|
||||||
|
@ -904,15 +911,63 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
binaryDictionary.addUnigramWord(word, unigramProbability,
|
binaryDictionary.addUnigramWord(word, unigramProbability,
|
||||||
null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
|
null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
|
||||||
isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
|
isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
|
||||||
final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
|
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
|
||||||
assertEquals(word, wordProperty.mCodePoints);
|
binaryDictionary.flushWithGC();
|
||||||
assertTrue(wordProperty.isValid());
|
}
|
||||||
assertEquals(isNotAWord, wordProperty.mIsNotAWord);
|
words.add(word);
|
||||||
assertEquals(isBlacklisted, wordProperty.mIsBlacklisted);
|
wordProbabilities.put(word, unigramProbability);
|
||||||
assertEquals(false, wordProperty.mHasBigrams);
|
final WordProperty unigramProperty = binaryDictionary.getWordProperty(word);
|
||||||
assertEquals(false, wordProperty.mHasShortcuts);
|
assertEquals(word, unigramProperty.mCodePoints);
|
||||||
assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
|
assertTrue(unigramProperty.isValid());
|
||||||
assertTrue(wordProperty.mShortcutTargets.isEmpty());
|
assertEquals(isNotAWord, unigramProperty.mIsNotAWord);
|
||||||
|
assertEquals(isBlacklisted, unigramProperty.mIsBlacklisted);
|
||||||
|
assertEquals(false, unigramProperty.mHasBigrams);
|
||||||
|
assertEquals(false, unigramProperty.mHasShortcuts);
|
||||||
|
assertEquals(unigramProbability, unigramProperty.mProbabilityInfo.mProbability);
|
||||||
|
assertTrue(unigramProperty.mShortcutTargets.isEmpty());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < BIGRAM_COUNT; i++) {
|
||||||
|
final int word0Index = random.nextInt(wordProbabilities.size());
|
||||||
|
final int word1Index = random.nextInt(wordProbabilities.size());
|
||||||
|
if (word0Index == word1Index) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
final String word0 = words.get(word0Index);
|
||||||
|
final String word1 = words.get(word1Index);
|
||||||
|
final int bigramProbability = random.nextInt(0xF);
|
||||||
|
binaryDictionary.addBigramWords(word0, word1, bigramProbability,
|
||||||
|
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
|
||||||
|
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
|
||||||
|
binaryDictionary.flushWithGC();
|
||||||
|
}
|
||||||
|
if (!bigrams.containsKey(word0)) {
|
||||||
|
final HashSet<String> bigramWord1s = new HashSet<String>();
|
||||||
|
bigrams.put(word0, bigramWord1s);
|
||||||
|
}
|
||||||
|
bigrams.get(word0).add(word1);
|
||||||
|
bigramProbabilities.put(new Pair<String, String>(word0, word1), bigramProbability);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < words.size(); i++) {
|
||||||
|
final String word0 = words.get(i);
|
||||||
|
if (!bigrams.containsKey(word0)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
final HashSet<String> bigramWord1s = bigrams.get(word0);
|
||||||
|
final WordProperty unigramProperty = binaryDictionary.getWordProperty(word0);
|
||||||
|
assertEquals(bigramWord1s.size(), unigramProperty.mBigramTargets.size());
|
||||||
|
assertEquals(unigramProperty.mBigramTargets.size(),
|
||||||
|
unigramProperty.mBigramProbabilityInfo.size());
|
||||||
|
for (int j = 0; j < unigramProperty.mBigramTargets.size(); j++) {
|
||||||
|
final String word1 = unigramProperty.mBigramTargets.get(j).mWord;
|
||||||
|
assertTrue(bigramWord1s.contains(word1));
|
||||||
|
final int probability = unigramProperty.mBigramTargets.get(j).mFrequency;
|
||||||
|
assertEquals((int)bigramProbabilities.get(new Pair<String, String>(word0, word1)),
|
||||||
|
probability);
|
||||||
|
assertEquals(unigramProperty.mBigramProbabilityInfo.get(j).mProbability,
|
||||||
|
probability);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue