Merge "Add BinaryDictionary.getBigramProbabilityNative()."
This commit is contained in:
commit
9465819cf6
7 changed files with 99 additions and 32 deletions
|
@ -109,7 +109,7 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
private static native void flushWithGCNative(long dict, String filePath);
|
private static native void flushWithGCNative(long dict, String filePath);
|
||||||
private static native void closeNative(long dict);
|
private static native void closeNative(long dict);
|
||||||
private static native int getProbabilityNative(long dict, int[] word);
|
private static native int getProbabilityNative(long dict, int[] word);
|
||||||
private static native boolean isValidBigramNative(long dict, int[] word0, int[] word1);
|
private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
|
||||||
private static native int getSuggestionsNative(long dict, long proximityInfo,
|
private static native int getSuggestionsNative(long dict, long proximityInfo,
|
||||||
long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
|
long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
|
||||||
int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint,
|
int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint,
|
||||||
|
@ -122,6 +122,8 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
private static native void addBigramWordsNative(long dict, int[] word0, int[] word1,
|
private static native void addBigramWordsNative(long dict, int[] word0, int[] word1,
|
||||||
int probability);
|
int probability);
|
||||||
private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
|
private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
|
||||||
|
private static native int calculateProbabilityNative(long dict, int unigramProbability,
|
||||||
|
int bigramProbability);
|
||||||
|
|
||||||
// TODO: Move native dict into session
|
// TODO: Move native dict into session
|
||||||
private final void loadDictionary(final String path, final long startOffset,
|
private final void loadDictionary(final String path, final long startOffset,
|
||||||
|
@ -219,12 +221,12 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isValidWord(final String word) {
|
public boolean isValidWord(final String word) {
|
||||||
return getFrequency(word) >= 0;
|
return getFrequency(word) != NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getFrequency(final String word) {
|
public int getFrequency(final String word) {
|
||||||
if (word == null) return -1;
|
if (word == null) return NOT_A_PROBABILITY;
|
||||||
int[] codePoints = StringUtils.toCodePointArray(word);
|
int[] codePoints = StringUtils.toCodePointArray(word);
|
||||||
return getProbabilityNative(mNativeDict, codePoints);
|
return getProbabilityNative(mNativeDict, codePoints);
|
||||||
}
|
}
|
||||||
|
@ -232,10 +234,14 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
// TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
|
// TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
|
||||||
// calls when checking for changes in an entire dictionary.
|
// calls when checking for changes in an entire dictionary.
|
||||||
public boolean isValidBigram(final String word0, final String word1) {
|
public boolean isValidBigram(final String word0, final String word1) {
|
||||||
if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) return false;
|
return getBigramProbability(word0, word1) != NOT_A_PROBABILITY;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getBigramProbability(final String word0, final String word1) {
|
||||||
|
if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) return NOT_A_PROBABILITY;
|
||||||
final int[] codePoints0 = StringUtils.toCodePointArray(word0);
|
final int[] codePoints0 = StringUtils.toCodePointArray(word0);
|
||||||
final int[] codePoints1 = StringUtils.toCodePointArray(word1);
|
final int[] codePoints1 = StringUtils.toCodePointArray(word1);
|
||||||
return isValidBigramNative(mNativeDict, codePoints0, codePoints1);
|
return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add a unigram entry to binary dictionary in native code.
|
// Add a unigram entry to binary dictionary in native code.
|
||||||
|
@ -285,6 +291,12 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
return needsToRunGCNative(mNativeDict);
|
return needsToRunGCNative(mNativeDict);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@UsedForTesting
|
||||||
|
public int calculateProbability(final int unigramProbability, final int bigramProbability) {
|
||||||
|
if (!isValidDictionary()) return NOT_A_PROBABILITY;
|
||||||
|
return calculateProbabilityNative(mNativeDict, unigramProbability, bigramProbability);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
|
public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
|
||||||
// TODO: actually use the confidence rather than use this completely broken heuristic
|
// TODO: actually use the confidence rather than use this completely broken heuristic
|
||||||
|
|
|
@ -188,8 +188,8 @@ static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz,
|
||||||
return dictionary->getProbability(codePoints, wordLength);
|
return dictionary->getProbability(codePoints, wordLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass clazz, jlong dict,
|
static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz,
|
||||||
jintArray word0, jintArray word1) {
|
jlong dict, jintArray word0, jintArray word1) {
|
||||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||||
if (!dictionary) return JNI_FALSE;
|
if (!dictionary) return JNI_FALSE;
|
||||||
const jsize word0Length = env->GetArrayLength(word0);
|
const jsize word0Length = env->GetArrayLength(word0);
|
||||||
|
@ -198,7 +198,8 @@ static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass claz
|
||||||
int word1CodePoints[word1Length];
|
int word1CodePoints[word1Length];
|
||||||
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
|
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
|
||||||
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
|
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
|
||||||
return dictionary->isValidBigram(word0CodePoints, word0Length, word1CodePoints, word1Length);
|
return dictionary->getBigramProbability(word0CodePoints, word0Length, word1CodePoints,
|
||||||
|
word1Length);
|
||||||
}
|
}
|
||||||
|
|
||||||
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
|
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
|
||||||
|
@ -269,6 +270,16 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz
|
||||||
word1Length);
|
word1Length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz,
|
||||||
|
jlong dict, jint unigramProbability, jint bigramProbability) {
|
||||||
|
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||||
|
if (!dictionary) {
|
||||||
|
return NOT_A_PROBABILITY;
|
||||||
|
}
|
||||||
|
return dictionary->getDictionaryStructurePolicy()->getProbability(unigramProbability,
|
||||||
|
bigramProbability);
|
||||||
|
}
|
||||||
|
|
||||||
static const JNINativeMethod sMethods[] = {
|
static const JNINativeMethod sMethods[] = {
|
||||||
{
|
{
|
||||||
const_cast<char *>("openNative"),
|
const_cast<char *>("openNative"),
|
||||||
|
@ -306,9 +317,9 @@ static const JNINativeMethod sMethods[] = {
|
||||||
reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)
|
reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
const_cast<char *>("isValidBigramNative"),
|
const_cast<char *>("getBigramProbabilityNative"),
|
||||||
const_cast<char *>("(J[I[I)Z"),
|
const_cast<char *>("(J[I[I)I"),
|
||||||
reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)
|
reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
const_cast<char *>("calcNormalizedScoreNative"),
|
const_cast<char *>("calcNormalizedScoreNative"),
|
||||||
|
@ -334,6 +345,11 @@ static const JNINativeMethod sMethods[] = {
|
||||||
const_cast<char *>("removeBigramWordsNative"),
|
const_cast<char *>("removeBigramWordsNative"),
|
||||||
const_cast<char *>("(J[I[I)V"),
|
const_cast<char *>("(J[I[I)V"),
|
||||||
reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
|
reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
|
||||||
|
},
|
||||||
|
{
|
||||||
|
const_cast<char *>("calculateProbabilityNative"),
|
||||||
|
const_cast<char *>("(JII)I"),
|
||||||
|
reinterpret_cast<void *>(latinime_BinaryDictionary_calculateProbabilityNative)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -150,24 +150,26 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
|
||||||
return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos);
|
return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1,
|
int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1,
|
||||||
int length1) const {
|
int length1) const {
|
||||||
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
|
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
|
||||||
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
||||||
if (NOT_A_DICT_POS == pos) return false;
|
if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
|
||||||
int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1,
|
int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (NOT_A_DICT_POS == nextWordPos) return false;
|
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
|
||||||
|
|
||||||
BinaryDictionaryBigramsIterator bigramsIt(
|
BinaryDictionaryBigramsIterator bigramsIt(
|
||||||
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
|
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
|
||||||
while (bigramsIt.hasNext()) {
|
while (bigramsIt.hasNext()) {
|
||||||
bigramsIt.next();
|
bigramsIt.next();
|
||||||
if (bigramsIt.getBigramPos() == nextWordPos) {
|
if (bigramsIt.getBigramPos() == nextWordPos) {
|
||||||
return true;
|
return mDictionaryStructurePolicy->getProbability(
|
||||||
|
mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos),
|
||||||
|
bigramsIt.getProbability());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Move functions related to bigram to here
|
// TODO: Move functions related to bigram to here
|
||||||
|
|
|
@ -29,7 +29,7 @@ class BigramDictionary {
|
||||||
|
|
||||||
int getPredictions(const int *word, int length, int *outBigramCodePoints,
|
int getPredictions(const int *word, int length, int *outBigramCodePoints,
|
||||||
int *outBigramProbability, int *outputTypes) const;
|
int *outBigramProbability, int *outputTypes) const;
|
||||||
bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
|
int getBigramProbability(const int *word1, int length1, const int *word2, int length2) const;
|
||||||
~BigramDictionary();
|
~BigramDictionary();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -93,8 +93,9 @@ int Dictionary::getProbability(const int *word, int length) const {
|
||||||
return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
|
return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const {
|
int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1,
|
||||||
return mBigramDictionary->isValidBigram(word0, length0, word1, length1);
|
int length1) const {
|
||||||
|
return mBigramDictionary->getBigramProbability(word0, length0, word1, length1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) {
|
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) {
|
||||||
|
|
|
@ -67,7 +67,7 @@ class Dictionary {
|
||||||
|
|
||||||
int getProbability(const int *word, int length) const;
|
int getProbability(const int *word, int length) const;
|
||||||
|
|
||||||
bool isValidBigram(const int *word0, int length0, const int *word1, int length1) const;
|
int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
|
||||||
|
|
||||||
void addUnigramWord(const int *const word, const int length, const int probability);
|
void addUnigramWord(const int *const word, const int length, const int probability);
|
||||||
|
|
||||||
|
|
|
@ -151,7 +151,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
||||||
for (int i = 0; i < wordCount; ++i) {
|
for (int i = 0; i < wordCount; ++i) {
|
||||||
final String word = CodePointUtils.generateWord(random, codePointSet);
|
final String word = CodePointUtils.generateWord(random, codePointSet);
|
||||||
probabilityMap.put(word, random.nextInt() & 0xFF);
|
probabilityMap.put(word, random.nextInt(0xFF));
|
||||||
}
|
}
|
||||||
for (String word : probabilityMap.keySet()) {
|
for (String word : probabilityMap.keySet()) {
|
||||||
binaryDictionary.addUnigramWord(word, probabilityMap.get(word));
|
binaryDictionary.addUnigramWord(word, probabilityMap.get(word));
|
||||||
|
@ -163,8 +163,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAddBigramWords() {
|
public void testAddBigramWords() {
|
||||||
// TODO: Add a test to check the frequency of the bigram score which uses current value
|
|
||||||
// calculated in the native code
|
|
||||||
File dictFile = null;
|
File dictFile = null;
|
||||||
try {
|
try {
|
||||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
||||||
|
@ -179,6 +177,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
|
|
||||||
final int unigramProbability = 100;
|
final int unigramProbability = 100;
|
||||||
final int bigramProbability = 10;
|
final int bigramProbability = 10;
|
||||||
|
final int updatedBigramProbability = 15;
|
||||||
binaryDictionary.addUnigramWord("aaa", unigramProbability);
|
binaryDictionary.addUnigramWord("aaa", unigramProbability);
|
||||||
binaryDictionary.addUnigramWord("abb", unigramProbability);
|
binaryDictionary.addUnigramWord("abb", unigramProbability);
|
||||||
binaryDictionary.addUnigramWord("bcc", unigramProbability);
|
binaryDictionary.addUnigramWord("bcc", unigramProbability);
|
||||||
|
@ -187,21 +186,49 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
|
binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
|
||||||
binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
|
binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
|
||||||
|
|
||||||
|
final int probability = binaryDictionary.calculateProbability(unigramProbability,
|
||||||
|
bigramProbability);
|
||||||
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
|
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
|
||||||
assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
|
assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
|
||||||
assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
|
assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
|
||||||
assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
|
assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
|
||||||
|
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
|
||||||
|
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
|
||||||
|
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
|
||||||
|
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
|
||||||
|
|
||||||
|
binaryDictionary.addBigramWords("aaa", "abb", updatedBigramProbability);
|
||||||
|
final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability,
|
||||||
|
updatedBigramProbability);
|
||||||
|
assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb"));
|
||||||
|
|
||||||
assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
|
assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
|
||||||
assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
|
assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
|
||||||
assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
|
assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
|
||||||
|
assertEquals(Dictionary.NOT_A_PROBABILITY,
|
||||||
|
binaryDictionary.getBigramProbability("bcc", "aaa"));
|
||||||
|
assertEquals(Dictionary.NOT_A_PROBABILITY,
|
||||||
|
binaryDictionary.getBigramProbability("bcc", "bbc"));
|
||||||
|
assertEquals(Dictionary.NOT_A_PROBABILITY,
|
||||||
|
binaryDictionary.getBigramProbability("aaa", "aaa"));
|
||||||
|
|
||||||
|
// Testing bigram link.
|
||||||
|
binaryDictionary.addUnigramWord("abcde", unigramProbability);
|
||||||
|
binaryDictionary.addUnigramWord("fghij", unigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("abcde", "fghij", bigramProbability);
|
||||||
|
binaryDictionary.addUnigramWord("fgh", unigramProbability);
|
||||||
|
binaryDictionary.addUnigramWord("abc", unigramProbability);
|
||||||
|
binaryDictionary.addUnigramWord("f", unigramProbability);
|
||||||
|
assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij"));
|
||||||
|
assertEquals(Dictionary.NOT_A_PROBABILITY,
|
||||||
|
binaryDictionary.getBigramProbability("abcde", "fgh"));
|
||||||
|
binaryDictionary.addBigramWords("abcde", "fghij", updatedBigramProbability);
|
||||||
|
assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij"));
|
||||||
|
|
||||||
dictFile.delete();
|
dictFile.delete();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRandomlyAddBigramWords() {
|
public void testRandomlyAddBigramWords() {
|
||||||
// TODO: Add a test to check the frequency of the bigram score which uses current value
|
|
||||||
// calculated in the native code
|
|
||||||
final int wordCount = 100;
|
final int wordCount = 100;
|
||||||
final int bigramCount = 1000;
|
final int bigramCount = 1000;
|
||||||
final int codePointSetSize = 50;
|
final int codePointSetSize = 50;
|
||||||
|
@ -222,29 +249,38 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
// Test a word that isn't contained within the dictionary.
|
// Test a word that isn't contained within the dictionary.
|
||||||
final Random random = new Random(seed);
|
final Random random = new Random(seed);
|
||||||
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
||||||
final int unigramProbability = 100;
|
final int[] unigramProbabilities = new int[wordCount];
|
||||||
final int bigramProbability = 10;
|
|
||||||
for (int i = 0; i < wordCount; ++i) {
|
for (int i = 0; i < wordCount; ++i) {
|
||||||
final String word = CodePointUtils.generateWord(random, codePointSet);
|
final String word = CodePointUtils.generateWord(random, codePointSet);
|
||||||
words.add(word);
|
words.add(word);
|
||||||
|
final int unigramProbability = random.nextInt(0xFF);
|
||||||
|
unigramProbabilities[i] = unigramProbability;
|
||||||
binaryDictionary.addUnigramWord(word, unigramProbability);
|
binaryDictionary.addUnigramWord(word, unigramProbability);
|
||||||
}
|
}
|
||||||
|
|
||||||
final boolean[][] bigramRelations = new boolean[wordCount][wordCount];
|
final int[][] probabilities = new int[wordCount][wordCount];
|
||||||
|
|
||||||
|
for (int i = 0; i < wordCount; ++i) {
|
||||||
|
for (int j = 0; j < wordCount; ++j) {
|
||||||
|
probabilities[i][j] = Dictionary.NOT_A_PROBABILITY;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < bigramCount; i++) {
|
for (int i = 0; i < bigramCount; i++) {
|
||||||
final int word0Index = random.nextInt(wordCount);
|
final int word0Index = random.nextInt(wordCount);
|
||||||
final int word1Index = random.nextInt(wordCount);
|
final int word1Index = random.nextInt(wordCount);
|
||||||
final String word0 = words.get(word0Index);
|
final String word0 = words.get(word0Index);
|
||||||
final String word1 = words.get(word1Index);
|
final String word1 = words.get(word1Index);
|
||||||
|
final int bigramProbability = random.nextInt(0xF);
|
||||||
bigramRelations[word0Index][word1Index] = true;
|
probabilities[word0Index][word1Index] = binaryDictionary.calculateProbability(
|
||||||
|
unigramProbabilities[word1Index], bigramProbability);
|
||||||
binaryDictionary.addBigramWords(word0, word1, bigramProbability);
|
binaryDictionary.addBigramWords(word0, word1, bigramProbability);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < words.size(); i++) {
|
for (int i = 0; i < words.size(); i++) {
|
||||||
for (int j = 0; j < words.size(); j++) {
|
for (int j = 0; j < words.size(); j++) {
|
||||||
assertEquals(bigramRelations[i][j],
|
assertEquals(probabilities[i][j],
|
||||||
binaryDictionary.isValidBigram(words.get(i), words.get(j)));
|
binaryDictionary.getBigramProbability(words.get(i), words.get(j)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue