Merge "Quit use bigram probability diff for ver4 dict."

This commit is contained in:
Keisuke Kuroyanagi 2014-05-15 07:07:24 +00:00 committed by Android (Google) Code Review
commit e810a266fd
6 changed files with 129 additions and 122 deletions

View file

@ -214,8 +214,6 @@ public final class BinaryDictionary extends Dictionary {
private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1); private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
private static native int addMultipleDictionaryEntriesNative(long dict, private static native int addMultipleDictionaryEntriesNative(long dict,
LanguageModelParam[] languageModelParams, int startIndex); LanguageModelParam[] languageModelParams, int startIndex);
private static native int calculateProbabilityNative(long dict, int unigramProbability,
int bigramProbability);
private static native String getPropertyNative(long dict, String query); private static native String getPropertyNative(long dict, String query);
private static native boolean isCorruptedNative(long dict); private static native boolean isCorruptedNative(long dict);
private static native boolean migrateNative(long dict, String dictFilePath, private static native boolean migrateNative(long dict, String dictFilePath,
@ -551,12 +549,6 @@ public final class BinaryDictionary extends Dictionary {
return true; return true;
} }
@UsedForTesting
public int calculateProbability(final int unigramProbability, final int bigramProbability) {
if (!isValidDictionary()) return NOT_A_PROBABILITY;
return calculateProbabilityNative(mNativeDict, unigramProbability, bigramProbability);
}
@UsedForTesting @UsedForTesting
public String getPropertyForTest(final String query) { public String getPropertyForTest(final String query) {
if (!isValidDictionary()) return ""; if (!isValidDictionary()) return "";

View file

@ -467,16 +467,6 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
return languageModelParamCount; return languageModelParamCount;
} }
static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz,
jlong dict, jint unigramProbability, jint bigramProbability) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return NOT_A_PROBABILITY;
}
return dictionary->getDictionaryStructurePolicy()->getProbability(unigramProbability,
bigramProbability);
}
static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, jlong dict, static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, jlong dict,
jstring query) { jstring query) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
@ -669,11 +659,6 @@ static const JNINativeMethod sMethods[] = {
"(J[Lcom/android/inputmethod/latin/utils/LanguageModelParam;I)I"), "(J[Lcom/android/inputmethod/latin/utils/LanguageModelParam;I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries) reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries)
}, },
{
const_cast<char *>("calculateProbabilityNative"),
const_cast<char *>("(JII)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_calculateProbabilityNative)
},
{ {
const_cast<char *>("getPropertyNative"), const_cast<char *>("getPropertyNative"),
const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"), const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"),

View file

@ -38,8 +38,6 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
int level = 0; int level = 0;
int count = 0; int count = 0;
if (mHasHistoricalInfo) { if (mHasHistoricalInfo) {
probability = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
timestamp = bigramListBuffer->readUintAndAdvancePosition( timestamp = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos); Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
level = bigramListBuffer->readUintAndAdvancePosition( level = bigramListBuffer->readUintAndAdvancePosition(
@ -47,7 +45,8 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
count = bigramListBuffer->readUintAndAdvancePosition( count = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos); Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
} else { } else {
probability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK; probability = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
} }
const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition( const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
@ -65,21 +64,13 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
bool BigramDictContent::writeBigramEntryAndAdvancePosition( bool BigramDictContent::writeBigramEntryAndAdvancePosition(
const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) { const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer(); BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
const int bigramFlags = createAndGetBigramFlags( const int bigramFlags = createAndGetBigramFlags(bigramEntryToWrite->hasNext());
mHasHistoricalInfo ? 0 : bigramEntryToWrite->getProbability(),
bigramEntryToWrite->hasNext());
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags, if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) { Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags); AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
return false; return false;
} }
if (mHasHistoricalInfo) { if (mHasHistoricalInfo) {
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
bigramEntryToWrite->getProbability());
return false;
}
const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo(); const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(), if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) { Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
@ -99,6 +90,13 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(
historicalInfo->getCount()); historicalInfo->getCount());
return false; return false;
} }
} else {
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
bigramEntryToWrite->getProbability());
return false;
}
} }
const int targetTerminalIdToWrite = const int targetTerminalIdToWrite =
(bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ? (bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?

View file

@ -95,9 +95,8 @@ class BigramDictContent : public SparseTableDictContent {
private: private:
DISALLOW_COPY_AND_ASSIGN(BigramDictContent); DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
int createAndGetBigramFlags(const int probability, const bool hasNext) const { int createAndGetBigramFlags(const bool hasNext) const {
return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK) return hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0;
| (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0);
} }
bool runGCBigramList(const int bigramListPos, bool runGCBigramList(const int bigramListPos,

View file

@ -115,9 +115,7 @@ int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
} else if (bigramProbability == NOT_A_PROBABILITY) { } else if (bigramProbability == NOT_A_PROBABILITY) {
return ProbabilityUtils::backoff(unigramProbability); return ProbabilityUtils::backoff(unigramProbability);
} else { } else {
// bigramProbability is a bigram probability delta. return bigramProbability;
return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
bigramProbability);
} }
} }
} }
@ -398,7 +396,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
const int probability = bigramEntry.hasHistoricalInfo() ? const int probability = bigramEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability( ForgettingCurveUtils::decodeProbability(
bigramEntry.getHistoricalInfo(), mHeaderPolicy) : bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
getProbability(word1Probability, bigramEntry.getProbability()); bigramEntry.getProbability();
bigrams.emplace_back(&word1, probability, bigrams.emplace_back(&word1, probability,
historicalInfo->getTimeStamp(), historicalInfo->getLevel(), historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount()); historicalInfo->getCount());

View file

@ -46,6 +46,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
private static final int[] DICT_FORMAT_VERSIONS = private static final int[] DICT_FORMAT_VERSIONS =
new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV }; new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV };
private static boolean canCheckBigramProbability(final int formatVersion) {
return formatVersion >= FormatSpec.VERSION4_DEV;
}
private File createEmptyDictionaryAndGetFile(final String dictId, private File createEmptyDictionaryAndGetFile(final String dictId,
final int formatVersion) throws IOException { final int formatVersion) throws IOException {
if (formatVersion == FormatSpec.VERSION4 if (formatVersion == FormatSpec.VERSION4
@ -298,8 +302,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int unigramProbability = 100; final int unigramProbability = 100;
final int bigramProbability = 10; final int bigramProbability = 150;
final int updatedBigramProbability = 15; final int updatedBigramProbability = 200;
addUnigramWord(binaryDictionary, "aaa", unigramProbability); addUnigramWord(binaryDictionary, "aaa", unigramProbability);
addUnigramWord(binaryDictionary, "abb", unigramProbability); addUnigramWord(binaryDictionary, "abb", unigramProbability);
addUnigramWord(binaryDictionary, "bcc", unigramProbability); addUnigramWord(binaryDictionary, "bcc", unigramProbability);
@ -308,25 +312,26 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
final int probability = binaryDictionary.calculateProbability(unigramProbability, assertTrue(binaryDictionary.isValidBigram("aaa", "abb"));
bigramProbability); assertTrue(binaryDictionary.isValidBigram("aaa", "bcc"));
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); assertTrue(binaryDictionary.isValidBigram("abb", "aaa"));
assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); assertTrue(binaryDictionary.isValidBigram("abb", "bcc"));
assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); if (canCheckBigramProbability(formatVersion)) {
assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); assertEquals(bigramProbability, binaryDictionary.getBigramProbability("aaa", "abb"));
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); assertEquals(bigramProbability, binaryDictionary.getBigramProbability("aaa", "bcc"));
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); assertEquals(bigramProbability, binaryDictionary.getBigramProbability("abb", "aaa"));
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); assertEquals(bigramProbability, binaryDictionary.getBigramProbability("abb", "bcc"));
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); }
addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability); addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability);
final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability, if (canCheckBigramProbability(formatVersion)) {
updatedBigramProbability); assertEquals(updatedBigramProbability,
assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb")); binaryDictionary.getBigramProbability("aaa", "abb"));
}
assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa")); assertFalse(binaryDictionary.isValidBigram("bcc", "aaa"));
assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc")); assertFalse(binaryDictionary.isValidBigram("bcc", "bbc"));
assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa")); assertFalse(binaryDictionary.isValidBigram("aaa", "aaa"));
assertEquals(Dictionary.NOT_A_PROBABILITY, assertEquals(Dictionary.NOT_A_PROBABILITY,
binaryDictionary.getBigramProbability("bcc", "aaa")); binaryDictionary.getBigramProbability("bcc", "aaa"));
assertEquals(Dictionary.NOT_A_PROBABILITY, assertEquals(Dictionary.NOT_A_PROBABILITY,
@ -341,11 +346,18 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addUnigramWord(binaryDictionary, "fgh", unigramProbability); addUnigramWord(binaryDictionary, "fgh", unigramProbability);
addUnigramWord(binaryDictionary, "abc", unigramProbability); addUnigramWord(binaryDictionary, "abc", unigramProbability);
addUnigramWord(binaryDictionary, "f", unigramProbability); addUnigramWord(binaryDictionary, "f", unigramProbability);
assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij"));
if (canCheckBigramProbability(formatVersion)) {
assertEquals(bigramProbability,
binaryDictionary.getBigramProbability("abcde", "fghij"));
}
assertEquals(Dictionary.NOT_A_PROBABILITY, assertEquals(Dictionary.NOT_A_PROBABILITY,
binaryDictionary.getBigramProbability("abcde", "fgh")); binaryDictionary.getBigramProbability("abcde", "fgh"));
addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability); addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability);
assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij")); if (canCheckBigramProbability(formatVersion)) {
assertEquals(updatedBigramProbability,
binaryDictionary.getBigramProbability("abcde", "fghij"));
}
dictFile.delete(); dictFile.delete();
} }
@ -396,18 +408,21 @@ public class BinaryDictionaryTests extends AndroidTestCase {
} }
final Pair<String, String> bigram = new Pair<String, String>(word0, word1); final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
bigramWords.add(bigram); bigramWords.add(bigram);
final int bigramProbability = random.nextInt(0xF); final int unigramProbability = unigramProbabilities.get(word1);
final int bigramProbability =
unigramProbability + random.nextInt(0xFF - unigramProbability);
bigramProbabilities.put(bigram, bigramProbability); bigramProbabilities.put(bigram, bigramProbability);
addBigramWords(binaryDictionary, word0, word1, bigramProbability); addBigramWords(binaryDictionary, word0, word1, bigramProbability);
} }
for (final Pair<String, String> bigram : bigramWords) { for (final Pair<String, String> bigram : bigramWords) {
final int unigramProbability = unigramProbabilities.get(bigram.second);
final int bigramProbability = bigramProbabilities.get(bigram); final int bigramProbability = bigramProbabilities.get(bigram);
final int probability = binaryDictionary.calculateProbability(unigramProbability, assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
bigramProbability); binaryDictionary.isValidBigram(bigram.first, bigram.second));
assertEquals(probability, if (canCheckBigramProbability(formatVersion)) {
binaryDictionary.getBigramProbability(bigram.first, bigram.second)); assertEquals(bigramProbability,
binaryDictionary.getBigramProbability(bigram.first, bigram.second));
}
} }
dictFile.delete(); dictFile.delete();
@ -430,7 +445,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int unigramProbability = 100; final int unigramProbability = 100;
final int bigramProbability = 10; final int bigramProbability = 150;
addUnigramWord(binaryDictionary, "aaa", unigramProbability); addUnigramWord(binaryDictionary, "aaa", unigramProbability);
addUnigramWord(binaryDictionary, "abb", unigramProbability); addUnigramWord(binaryDictionary, "abb", unigramProbability);
addUnigramWord(binaryDictionary, "bcc", unigramProbability); addUnigramWord(binaryDictionary, "bcc", unigramProbability);
@ -439,23 +454,23 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); assertTrue(binaryDictionary.isValidBigram("aaa", "abb"));
assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); assertTrue(binaryDictionary.isValidBigram("aaa", "bcc"));
assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); assertTrue(binaryDictionary.isValidBigram("abb", "aaa"));
assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); assertTrue(binaryDictionary.isValidBigram("abb", "bcc"));
binaryDictionary.removeBigramWords("aaa", "abb"); binaryDictionary.removeBigramWords("aaa", "abb");
assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb")); assertFalse(binaryDictionary.isValidBigram("aaa", "abb"));
addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); assertTrue(binaryDictionary.isValidBigram("aaa", "abb"));
binaryDictionary.removeBigramWords("aaa", "bcc"); binaryDictionary.removeBigramWords("aaa", "bcc");
assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc")); assertFalse(binaryDictionary.isValidBigram("aaa", "bcc"));
binaryDictionary.removeBigramWords("abb", "aaa"); binaryDictionary.removeBigramWords("abb", "aaa");
assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa")); assertFalse(binaryDictionary.isValidBigram("abb", "aaa"));
binaryDictionary.removeBigramWords("abb", "bcc"); binaryDictionary.removeBigramWords("abb", "bcc");
assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc")); assertFalse(binaryDictionary.isValidBigram("abb", "bcc"));
binaryDictionary.removeBigramWords("aaa", "abb"); binaryDictionary.removeBigramWords("aaa", "abb");
// Test remove non-existing bigram operation. // Test remove non-existing bigram operation.
@ -537,7 +552,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int unigramProbability = 100; final int unigramProbability = 100;
final int bigramProbability = 10; final int bigramProbability = 150;
addUnigramWord(binaryDictionary, "aaa", unigramProbability); addUnigramWord(binaryDictionary, "aaa", unigramProbability);
addUnigramWord(binaryDictionary, "abb", unigramProbability); addUnigramWord(binaryDictionary, "abb", unigramProbability);
addUnigramWord(binaryDictionary, "bcc", unigramProbability); addUnigramWord(binaryDictionary, "bcc", unigramProbability);
@ -551,18 +566,18 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int probability = binaryDictionary.calculateProbability(unigramProbability,
bigramProbability);
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("abb")); assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc")); assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); if (canCheckBigramProbability(formatVersion)) {
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); assertEquals(bigramProbability, binaryDictionary.getBigramProbability("aaa", "abb"));
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); assertEquals(bigramProbability, binaryDictionary.getBigramProbability("aaa", "bcc"));
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); assertEquals(bigramProbability, binaryDictionary.getBigramProbability("abb", "aaa"));
assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa")); assertEquals(bigramProbability, binaryDictionary.getBigramProbability("abb", "bcc"));
assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc")); }
assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa")); assertFalse(binaryDictionary.isValidBigram("bcc", "aaa"));
assertFalse(binaryDictionary.isValidBigram("bcc", "bbc"));
assertFalse(binaryDictionary.isValidBigram("aaa", "aaa"));
binaryDictionary.flushWithGC(); binaryDictionary.flushWithGC();
binaryDictionary.close(); binaryDictionary.close();
@ -617,7 +632,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
} }
final Pair<String, String> bigram = new Pair<String, String>(word0, word1); final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
bigramWords.add(bigram); bigramWords.add(bigram);
final int bigramProbability = random.nextInt(0xF); final int unigramProbability = unigramProbabilities.get(word1);
final int bigramProbability =
unigramProbability + random.nextInt(0xFF - unigramProbability);
bigramProbabilities.put(bigram, bigramProbability); bigramProbabilities.put(bigram, bigramProbability);
addBigramWords(binaryDictionary, word0, word1, bigramProbability); addBigramWords(binaryDictionary, word0, word1, bigramProbability);
} }
@ -628,13 +645,15 @@ public class BinaryDictionaryTests extends AndroidTestCase {
0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
for (final Pair<String, String> bigram : bigramWords) { for (final Pair<String, String> bigram : bigramWords) {
final int unigramProbability = unigramProbabilities.get(bigram.second);
final int bigramProbability = bigramProbabilities.get(bigram); final int bigramProbability = bigramProbabilities.get(bigram);
final int probability = binaryDictionary.calculateProbability(unigramProbability, assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
bigramProbability); binaryDictionary.isValidBigram(bigram.first, bigram.second));
assertEquals(probability, if (canCheckBigramProbability(formatVersion)) {
binaryDictionary.getBigramProbability(bigram.first, bigram.second)); assertEquals(bigramProbability,
binaryDictionary.getBigramProbability(bigram.first, bigram.second));
}
} }
dictFile.delete(); dictFile.delete();
@ -709,7 +728,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
if (TextUtils.equals(word0, word1)) { if (TextUtils.equals(word0, word1)) {
continue; continue;
} }
final int bigramProbability = random.nextInt(0xF); final int unigramProbability = unigramProbabilities.get(word1);
final int bigramProbability =
unigramProbability + random.nextInt(0xFF - unigramProbability);
final Pair<String, String> bigram = new Pair<String, String>(word0, word1); final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
bigramWords.add(bigram); bigramWords.add(bigram);
bigramProbabilities.put(bigram, bigramProbability); bigramProbabilities.put(bigram, bigramProbability);
@ -734,17 +755,20 @@ public class BinaryDictionaryTests extends AndroidTestCase {
// Test whether the all bigram operations are collectlly handled. // Test whether the all bigram operations are collectlly handled.
for (int i = 0; i < bigramWords.size(); i++) { for (int i = 0; i < bigramWords.size(); i++) {
final Pair<String, String> bigram = bigramWords.get(i); final Pair<String, String> bigram = bigramWords.get(i);
final int unigramProbability = unigramProbabilities.get(bigram.second);
final int probability; final int probability;
if (bigramProbabilities.containsKey(bigram)) { if (bigramProbabilities.containsKey(bigram)) {
final int bigramProbability = bigramProbabilities.get(bigram); final int bigramProbability = bigramProbabilities.get(bigram);
probability = binaryDictionary.calculateProbability(unigramProbability, probability = bigramProbability;
bigramProbability);
} else { } else {
probability = Dictionary.NOT_A_PROBABILITY; probability = Dictionary.NOT_A_PROBABILITY;
} }
assertEquals(probability,
binaryDictionary.getBigramProbability(bigram.first, bigram.second)); if (canCheckBigramProbability(formatVersion)) {
assertEquals(probability,
binaryDictionary.getBigramProbability(bigram.first, bigram.second));
}
assertEquals(probability != Dictionary.NOT_A_PROBABILITY,
binaryDictionary.isValidBigram(bigram.first, bigram.second));
} }
binaryDictionary.flushWithGC(); binaryDictionary.flushWithGC();
binaryDictionary.close(); binaryDictionary.close();
@ -894,7 +918,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
for (int i = 0; i < languageModelParams.length; i++) { for (int i = 0; i < languageModelParams.length; i++) {
final String word = CodePointUtils.generateWord(random, codePointSet); final String word = CodePointUtils.generateWord(random, codePointSet);
final int probability = random.nextInt(0xFF); final int probability = random.nextInt(0xFF);
final int bigramProbability = random.nextInt(0xF); final int bigramProbability = probability + random.nextInt(0xFF - probability);
unigramProbabilities.put(word, probability); unigramProbabilities.put(word, probability);
if (prevWord == null) { if (prevWord == null) {
languageModelParams[i] = new LanguageModelParam(word, probability, languageModelParams[i] = new LanguageModelParam(word, probability,
@ -920,11 +944,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) { for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) {
final String word0 = entry.getKey().first; final String word0 = entry.getKey().first;
final String word1 = entry.getKey().second; final String word1 = entry.getKey().second;
final int unigramProbability = unigramProbabilities.get(word1);
final int bigramProbability = entry.getValue(); final int bigramProbability = entry.getValue();
final int probability = binaryDictionary.calculateProbability( assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
unigramProbability, bigramProbability); binaryDictionary.isValidBigram(word0, word1));
assertEquals(probability, binaryDictionary.getBigramProbability(word0, word1)); if (canCheckBigramProbability(formatVersion)) {
assertEquals(bigramProbability,
binaryDictionary.getBigramProbability(word0, word1));
}
} }
} }
@ -994,7 +1020,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
} }
final String word0 = words.get(word0Index); final String word0 = words.get(word0Index);
final String word1 = words.get(word1Index); final String word1 = words.get(word1Index);
final int bigramProbability = random.nextInt(0xF); final int unigramProbability = wordProbabilities.get(word1);
final int bigramProbability =
unigramProbability + random.nextInt(0xFF - unigramProbability);
binaryDictionary.addBigramWords(word0, word1, bigramProbability, binaryDictionary.addBigramWords(word0, word1, bigramProbability,
BinaryDictionary.NOT_A_VALID_TIMESTAMP); BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
@ -1019,12 +1047,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
for (int j = 0; j < wordProperty.mBigrams.size(); j++) { for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
final String word1 = wordProperty.mBigrams.get(j).mWord; final String word1 = wordProperty.mBigrams.get(j).mWord;
assertTrue(bigramWord1s.contains(word1)); assertTrue(bigramWord1s.contains(word1));
final int bigramProbabilityDelta = bigramProbabilities.get( if (canCheckBigramProbability(formatVersion)) {
new Pair<String, String>(word0, word1)); final int bigramProbability = bigramProbabilities.get(
final int unigramProbability = wordProbabilities.get(word1); new Pair<String, String>(word0, word1));
final int bigramProbablity = binaryDictionary.calculateProbability( assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
unigramProbability, bigramProbabilityDelta); }
assertEquals(wordProperty.mBigrams.get(j).getProbability(), bigramProbablity);
} }
} }
} }
@ -1082,7 +1109,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
} }
final String word0 = words.get(word0Index); final String word0 = words.get(word0Index);
final String word1 = words.get(word1Index); final String word1 = words.get(word1Index);
final int bigramProbability = random.nextInt(0xF); final int unigramProbability = wordProbabilitiesToCheckLater.get(word1);
final int bigramProbability =
unigramProbability + random.nextInt(0xFF - unigramProbability);
binaryDictionary.addBigramWords(word0, word1, bigramProbability, binaryDictionary.addBigramWords(word0, word1, bigramProbability,
BinaryDictionary.NOT_A_VALID_TIMESTAMP); BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
@ -1113,12 +1142,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
for (int j = 0; j < wordProperty.mBigrams.size(); j++) { for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
final String word1 = wordProperty.mBigrams.get(j).mWord; final String word1 = wordProperty.mBigrams.get(j).mWord;
assertTrue(bigramWord1s.contains(word1)); assertTrue(bigramWord1s.contains(word1));
final int unigramProbability = wordProbabilitiesToCheckLater.get(word1);
final Pair<String, String> bigram = new Pair<String, String>(word0, word1); final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
final int bigramProbabilityDelta = bigramProbabilitiesToCheckLater.get(bigram); if (canCheckBigramProbability(formatVersion)) {
final int bigramProbablity = binaryDictionary.calculateProbability( final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
unigramProbability, bigramProbabilityDelta); assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
assertEquals(wordProperty.mBigrams.get(j).getProbability(), bigramProbablity); }
bigramSet.remove(bigram); bigramSet.remove(bigram);
} }
token = result.mNextToken; token = result.mNextToken;
@ -1286,7 +1314,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int unigramProbability = 100; final int unigramProbability = 100;
addUnigramWord(binaryDictionary, "aaa", unigramProbability); addUnigramWord(binaryDictionary, "aaa", unigramProbability);
addUnigramWord(binaryDictionary, "bbb", unigramProbability); addUnigramWord(binaryDictionary, "bbb", unigramProbability);
final int bigramProbability = 10; final int bigramProbability = 150;
addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability); addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
final int shortcutProbability = 10; final int shortcutProbability = 10;
binaryDictionary.addUnigramWord("ccc", unigramProbability, "xxx", shortcutProbability, binaryDictionary.addUnigramWord("ccc", unigramProbability, "xxx", shortcutProbability,
@ -1303,7 +1331,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(toFormatVersion, binaryDictionary.getFormatVersion()); assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
// TODO: Add tests for bigram frequency when the implementation gets ready. if (canCheckBigramProbability(toFormatVersion)) {
assertEquals(bigramProbability, binaryDictionary.getBigramProbability("aaa", "bbb"));
}
assertTrue(binaryDictionary.isValidBigram("aaa", "bbb")); assertTrue(binaryDictionary.isValidBigram("aaa", "bbb"));
WordProperty wordProperty = binaryDictionary.getWordProperty("ccc"); WordProperty wordProperty = binaryDictionary.getWordProperty("ccc");
assertEquals(1, wordProperty.mShortcutTargets.size()); assertEquals(1, wordProperty.mShortcutTargets.size());
@ -1362,7 +1392,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
} }
final String word0 = words.get(word0Index); final String word0 = words.get(word0Index);
final String word1 = words.get(word1Index); final String word1 = words.get(word1Index);
final int bigramProbability = random.nextInt(0xF); final int unigramProbability = unigramProbabilities.get(word1);
final int bigramProbability =
random.nextInt(0xFF - unigramProbability) + unigramProbability;
binaryDictionary.addBigramWords(word0, word1, bigramProbability, binaryDictionary.addBigramWords(word0, word1, bigramProbability,
BinaryDictionary.NOT_A_VALID_TIMESTAMP); BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
@ -1381,7 +1413,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
for (final Pair<String, String> bigram : bigrams) { for (final Pair<String, String> bigram : bigrams) {
// TODO: Add tests for bigram frequency when the implementation gets ready. if (canCheckBigramProbability(toFormatVersion)) {
assertEquals((int)bigramProbabilities.get(bigram),
binaryDictionary.getBigramProbability(bigram.first, bigram.second));
}
assertTrue(binaryDictionary.isValidBigram(bigram.first, bigram.second)); assertTrue(binaryDictionary.isValidBigram(bigram.first, bigram.second));
} }
assertEquals(bigramProbabilities.size(), Integer.parseInt( assertEquals(bigramProbabilities.size(), Integer.parseInt(