Merge "Quit use bigram probability diff for ver4 dict."
This commit is contained in:
commit
e810a266fd
6 changed files with 129 additions and 122 deletions
|
@ -214,8 +214,6 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
|
private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
|
||||||
private static native int addMultipleDictionaryEntriesNative(long dict,
|
private static native int addMultipleDictionaryEntriesNative(long dict,
|
||||||
LanguageModelParam[] languageModelParams, int startIndex);
|
LanguageModelParam[] languageModelParams, int startIndex);
|
||||||
private static native int calculateProbabilityNative(long dict, int unigramProbability,
|
|
||||||
int bigramProbability);
|
|
||||||
private static native String getPropertyNative(long dict, String query);
|
private static native String getPropertyNative(long dict, String query);
|
||||||
private static native boolean isCorruptedNative(long dict);
|
private static native boolean isCorruptedNative(long dict);
|
||||||
private static native boolean migrateNative(long dict, String dictFilePath,
|
private static native boolean migrateNative(long dict, String dictFilePath,
|
||||||
|
@ -551,12 +549,6 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
public int calculateProbability(final int unigramProbability, final int bigramProbability) {
|
|
||||||
if (!isValidDictionary()) return NOT_A_PROBABILITY;
|
|
||||||
return calculateProbabilityNative(mNativeDict, unigramProbability, bigramProbability);
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public String getPropertyForTest(final String query) {
|
public String getPropertyForTest(final String query) {
|
||||||
if (!isValidDictionary()) return "";
|
if (!isValidDictionary()) return "";
|
||||||
|
|
|
@ -467,16 +467,6 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
||||||
return languageModelParamCount;
|
return languageModelParamCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz,
|
|
||||||
jlong dict, jint unigramProbability, jint bigramProbability) {
|
|
||||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
|
||||||
if (!dictionary) {
|
|
||||||
return NOT_A_PROBABILITY;
|
|
||||||
}
|
|
||||||
return dictionary->getDictionaryStructurePolicy()->getProbability(unigramProbability,
|
|
||||||
bigramProbability);
|
|
||||||
}
|
|
||||||
|
|
||||||
static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, jlong dict,
|
static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, jlong dict,
|
||||||
jstring query) {
|
jstring query) {
|
||||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||||
|
@ -669,11 +659,6 @@ static const JNINativeMethod sMethods[] = {
|
||||||
"(J[Lcom/android/inputmethod/latin/utils/LanguageModelParam;I)I"),
|
"(J[Lcom/android/inputmethod/latin/utils/LanguageModelParam;I)I"),
|
||||||
reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries)
|
reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries)
|
||||||
},
|
},
|
||||||
{
|
|
||||||
const_cast<char *>("calculateProbabilityNative"),
|
|
||||||
const_cast<char *>("(JII)I"),
|
|
||||||
reinterpret_cast<void *>(latinime_BinaryDictionary_calculateProbabilityNative)
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
const_cast<char *>("getPropertyNative"),
|
const_cast<char *>("getPropertyNative"),
|
||||||
const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"),
|
const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"),
|
||||||
|
|
|
@ -38,8 +38,6 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
|
||||||
int level = 0;
|
int level = 0;
|
||||||
int count = 0;
|
int count = 0;
|
||||||
if (mHasHistoricalInfo) {
|
if (mHasHistoricalInfo) {
|
||||||
probability = bigramListBuffer->readUintAndAdvancePosition(
|
|
||||||
Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
|
|
||||||
timestamp = bigramListBuffer->readUintAndAdvancePosition(
|
timestamp = bigramListBuffer->readUintAndAdvancePosition(
|
||||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
|
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
|
||||||
level = bigramListBuffer->readUintAndAdvancePosition(
|
level = bigramListBuffer->readUintAndAdvancePosition(
|
||||||
|
@ -47,7 +45,8 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
|
||||||
count = bigramListBuffer->readUintAndAdvancePosition(
|
count = bigramListBuffer->readUintAndAdvancePosition(
|
||||||
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
|
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
|
||||||
} else {
|
} else {
|
||||||
probability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
|
probability = bigramListBuffer->readUintAndAdvancePosition(
|
||||||
|
Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
|
||||||
}
|
}
|
||||||
const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
|
const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
|
||||||
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
|
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
|
||||||
|
@ -65,21 +64,13 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
|
||||||
bool BigramDictContent::writeBigramEntryAndAdvancePosition(
|
bool BigramDictContent::writeBigramEntryAndAdvancePosition(
|
||||||
const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
|
const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
|
||||||
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
|
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
|
||||||
const int bigramFlags = createAndGetBigramFlags(
|
const int bigramFlags = createAndGetBigramFlags(bigramEntryToWrite->hasNext());
|
||||||
mHasHistoricalInfo ? 0 : bigramEntryToWrite->getProbability(),
|
|
||||||
bigramEntryToWrite->hasNext());
|
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
|
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
|
||||||
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
|
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
|
||||||
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
|
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (mHasHistoricalInfo) {
|
if (mHasHistoricalInfo) {
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
|
|
||||||
Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
|
|
||||||
AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
|
|
||||||
bigramEntryToWrite->getProbability());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
|
const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
|
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
|
||||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
|
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
|
||||||
|
@ -99,6 +90,13 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(
|
||||||
historicalInfo->getCount());
|
historicalInfo->getCount());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
|
||||||
|
Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
|
||||||
|
AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
|
||||||
|
bigramEntryToWrite->getProbability());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
const int targetTerminalIdToWrite =
|
const int targetTerminalIdToWrite =
|
||||||
(bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
|
(bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
|
||||||
|
|
|
@ -95,9 +95,8 @@ class BigramDictContent : public SparseTableDictContent {
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
|
DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
|
||||||
|
|
||||||
int createAndGetBigramFlags(const int probability, const bool hasNext) const {
|
int createAndGetBigramFlags(const bool hasNext) const {
|
||||||
return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK)
|
return hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0;
|
||||||
| (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool runGCBigramList(const int bigramListPos,
|
bool runGCBigramList(const int bigramListPos,
|
||||||
|
|
|
@ -115,9 +115,7 @@ int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||||
} else if (bigramProbability == NOT_A_PROBABILITY) {
|
} else if (bigramProbability == NOT_A_PROBABILITY) {
|
||||||
return ProbabilityUtils::backoff(unigramProbability);
|
return ProbabilityUtils::backoff(unigramProbability);
|
||||||
} else {
|
} else {
|
||||||
// bigramProbability is a bigram probability delta.
|
return bigramProbability;
|
||||||
return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
|
|
||||||
bigramProbability);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -398,7 +396,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
|
||||||
const int probability = bigramEntry.hasHistoricalInfo() ?
|
const int probability = bigramEntry.hasHistoricalInfo() ?
|
||||||
ForgettingCurveUtils::decodeProbability(
|
ForgettingCurveUtils::decodeProbability(
|
||||||
bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
|
bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
|
||||||
getProbability(word1Probability, bigramEntry.getProbability());
|
bigramEntry.getProbability();
|
||||||
bigrams.emplace_back(&word1, probability,
|
bigrams.emplace_back(&word1, probability,
|
||||||
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||||
historicalInfo->getCount());
|
historicalInfo->getCount());
|
||||||
|
|
|
@ -46,6 +46,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
private static final int[] DICT_FORMAT_VERSIONS =
|
private static final int[] DICT_FORMAT_VERSIONS =
|
||||||
new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV };
|
new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV };
|
||||||
|
|
||||||
|
private static boolean canCheckBigramProbability(final int formatVersion) {
|
||||||
|
return formatVersion >= FormatSpec.VERSION4_DEV;
|
||||||
|
}
|
||||||
|
|
||||||
private File createEmptyDictionaryAndGetFile(final String dictId,
|
private File createEmptyDictionaryAndGetFile(final String dictId,
|
||||||
final int formatVersion) throws IOException {
|
final int formatVersion) throws IOException {
|
||||||
if (formatVersion == FormatSpec.VERSION4
|
if (formatVersion == FormatSpec.VERSION4
|
||||||
|
@ -298,8 +302,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
|
||||||
final int unigramProbability = 100;
|
final int unigramProbability = 100;
|
||||||
final int bigramProbability = 10;
|
final int bigramProbability = 150;
|
||||||
final int updatedBigramProbability = 15;
|
final int updatedBigramProbability = 200;
|
||||||
addUnigramWord(binaryDictionary, "aaa", unigramProbability);
|
addUnigramWord(binaryDictionary, "aaa", unigramProbability);
|
||||||
addUnigramWord(binaryDictionary, "abb", unigramProbability);
|
addUnigramWord(binaryDictionary, "abb", unigramProbability);
|
||||||
addUnigramWord(binaryDictionary, "bcc", unigramProbability);
|
addUnigramWord(binaryDictionary, "bcc", unigramProbability);
|
||||||
|
@ -308,25 +312,26 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
|
addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
|
||||||
addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
|
addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
|
||||||
|
|
||||||
final int probability = binaryDictionary.calculateProbability(unigramProbability,
|
assertTrue(binaryDictionary.isValidBigram("aaa", "abb"));
|
||||||
bigramProbability);
|
assertTrue(binaryDictionary.isValidBigram("aaa", "bcc"));
|
||||||
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
|
assertTrue(binaryDictionary.isValidBigram("abb", "aaa"));
|
||||||
assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
|
assertTrue(binaryDictionary.isValidBigram("abb", "bcc"));
|
||||||
assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
|
if (canCheckBigramProbability(formatVersion)) {
|
||||||
assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
|
assertEquals(bigramProbability, binaryDictionary.getBigramProbability("aaa", "abb"));
|
||||||
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
|
assertEquals(bigramProbability, binaryDictionary.getBigramProbability("aaa", "bcc"));
|
||||||
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
|
assertEquals(bigramProbability, binaryDictionary.getBigramProbability("abb", "aaa"));
|
||||||
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
|
assertEquals(bigramProbability, binaryDictionary.getBigramProbability("abb", "bcc"));
|
||||||
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
|
}
|
||||||
|
|
||||||
addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability);
|
addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability);
|
||||||
final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability,
|
if (canCheckBigramProbability(formatVersion)) {
|
||||||
updatedBigramProbability);
|
assertEquals(updatedBigramProbability,
|
||||||
assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb"));
|
binaryDictionary.getBigramProbability("aaa", "abb"));
|
||||||
|
}
|
||||||
|
|
||||||
assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
|
assertFalse(binaryDictionary.isValidBigram("bcc", "aaa"));
|
||||||
assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
|
assertFalse(binaryDictionary.isValidBigram("bcc", "bbc"));
|
||||||
assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
|
assertFalse(binaryDictionary.isValidBigram("aaa", "aaa"));
|
||||||
assertEquals(Dictionary.NOT_A_PROBABILITY,
|
assertEquals(Dictionary.NOT_A_PROBABILITY,
|
||||||
binaryDictionary.getBigramProbability("bcc", "aaa"));
|
binaryDictionary.getBigramProbability("bcc", "aaa"));
|
||||||
assertEquals(Dictionary.NOT_A_PROBABILITY,
|
assertEquals(Dictionary.NOT_A_PROBABILITY,
|
||||||
|
@ -341,11 +346,18 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
addUnigramWord(binaryDictionary, "fgh", unigramProbability);
|
addUnigramWord(binaryDictionary, "fgh", unigramProbability);
|
||||||
addUnigramWord(binaryDictionary, "abc", unigramProbability);
|
addUnigramWord(binaryDictionary, "abc", unigramProbability);
|
||||||
addUnigramWord(binaryDictionary, "f", unigramProbability);
|
addUnigramWord(binaryDictionary, "f", unigramProbability);
|
||||||
assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij"));
|
|
||||||
|
if (canCheckBigramProbability(formatVersion)) {
|
||||||
|
assertEquals(bigramProbability,
|
||||||
|
binaryDictionary.getBigramProbability("abcde", "fghij"));
|
||||||
|
}
|
||||||
assertEquals(Dictionary.NOT_A_PROBABILITY,
|
assertEquals(Dictionary.NOT_A_PROBABILITY,
|
||||||
binaryDictionary.getBigramProbability("abcde", "fgh"));
|
binaryDictionary.getBigramProbability("abcde", "fgh"));
|
||||||
addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability);
|
addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability);
|
||||||
assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij"));
|
if (canCheckBigramProbability(formatVersion)) {
|
||||||
|
assertEquals(updatedBigramProbability,
|
||||||
|
binaryDictionary.getBigramProbability("abcde", "fghij"));
|
||||||
|
}
|
||||||
|
|
||||||
dictFile.delete();
|
dictFile.delete();
|
||||||
}
|
}
|
||||||
|
@ -396,18 +408,21 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
|
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
|
||||||
bigramWords.add(bigram);
|
bigramWords.add(bigram);
|
||||||
final int bigramProbability = random.nextInt(0xF);
|
final int unigramProbability = unigramProbabilities.get(word1);
|
||||||
|
final int bigramProbability =
|
||||||
|
unigramProbability + random.nextInt(0xFF - unigramProbability);
|
||||||
bigramProbabilities.put(bigram, bigramProbability);
|
bigramProbabilities.put(bigram, bigramProbability);
|
||||||
addBigramWords(binaryDictionary, word0, word1, bigramProbability);
|
addBigramWords(binaryDictionary, word0, word1, bigramProbability);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (final Pair<String, String> bigram : bigramWords) {
|
for (final Pair<String, String> bigram : bigramWords) {
|
||||||
final int unigramProbability = unigramProbabilities.get(bigram.second);
|
|
||||||
final int bigramProbability = bigramProbabilities.get(bigram);
|
final int bigramProbability = bigramProbabilities.get(bigram);
|
||||||
final int probability = binaryDictionary.calculateProbability(unigramProbability,
|
assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
|
||||||
bigramProbability);
|
binaryDictionary.isValidBigram(bigram.first, bigram.second));
|
||||||
assertEquals(probability,
|
if (canCheckBigramProbability(formatVersion)) {
|
||||||
binaryDictionary.getBigramProbability(bigram.first, bigram.second));
|
assertEquals(bigramProbability,
|
||||||
|
binaryDictionary.getBigramProbability(bigram.first, bigram.second));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dictFile.delete();
|
dictFile.delete();
|
||||||
|
@ -430,7 +445,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
final int unigramProbability = 100;
|
final int unigramProbability = 100;
|
||||||
final int bigramProbability = 10;
|
final int bigramProbability = 150;
|
||||||
addUnigramWord(binaryDictionary, "aaa", unigramProbability);
|
addUnigramWord(binaryDictionary, "aaa", unigramProbability);
|
||||||
addUnigramWord(binaryDictionary, "abb", unigramProbability);
|
addUnigramWord(binaryDictionary, "abb", unigramProbability);
|
||||||
addUnigramWord(binaryDictionary, "bcc", unigramProbability);
|
addUnigramWord(binaryDictionary, "bcc", unigramProbability);
|
||||||
|
@ -439,23 +454,23 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
|
addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
|
||||||
addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
|
addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
|
||||||
|
|
||||||
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
|
assertTrue(binaryDictionary.isValidBigram("aaa", "abb"));
|
||||||
assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
|
assertTrue(binaryDictionary.isValidBigram("aaa", "bcc"));
|
||||||
assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
|
assertTrue(binaryDictionary.isValidBigram("abb", "aaa"));
|
||||||
assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
|
assertTrue(binaryDictionary.isValidBigram("abb", "bcc"));
|
||||||
|
|
||||||
binaryDictionary.removeBigramWords("aaa", "abb");
|
binaryDictionary.removeBigramWords("aaa", "abb");
|
||||||
assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb"));
|
assertFalse(binaryDictionary.isValidBigram("aaa", "abb"));
|
||||||
addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
|
addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
|
||||||
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
|
assertTrue(binaryDictionary.isValidBigram("aaa", "abb"));
|
||||||
|
|
||||||
|
|
||||||
binaryDictionary.removeBigramWords("aaa", "bcc");
|
binaryDictionary.removeBigramWords("aaa", "bcc");
|
||||||
assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc"));
|
assertFalse(binaryDictionary.isValidBigram("aaa", "bcc"));
|
||||||
binaryDictionary.removeBigramWords("abb", "aaa");
|
binaryDictionary.removeBigramWords("abb", "aaa");
|
||||||
assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa"));
|
assertFalse(binaryDictionary.isValidBigram("abb", "aaa"));
|
||||||
binaryDictionary.removeBigramWords("abb", "bcc");
|
binaryDictionary.removeBigramWords("abb", "bcc");
|
||||||
assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc"));
|
assertFalse(binaryDictionary.isValidBigram("abb", "bcc"));
|
||||||
|
|
||||||
binaryDictionary.removeBigramWords("aaa", "abb");
|
binaryDictionary.removeBigramWords("aaa", "abb");
|
||||||
// Test remove non-existing bigram operation.
|
// Test remove non-existing bigram operation.
|
||||||
|
@ -537,7 +552,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
|
||||||
final int unigramProbability = 100;
|
final int unigramProbability = 100;
|
||||||
final int bigramProbability = 10;
|
final int bigramProbability = 150;
|
||||||
addUnigramWord(binaryDictionary, "aaa", unigramProbability);
|
addUnigramWord(binaryDictionary, "aaa", unigramProbability);
|
||||||
addUnigramWord(binaryDictionary, "abb", unigramProbability);
|
addUnigramWord(binaryDictionary, "abb", unigramProbability);
|
||||||
addUnigramWord(binaryDictionary, "bcc", unigramProbability);
|
addUnigramWord(binaryDictionary, "bcc", unigramProbability);
|
||||||
|
@ -551,18 +566,18 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
final int probability = binaryDictionary.calculateProbability(unigramProbability,
|
|
||||||
bigramProbability);
|
|
||||||
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
|
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
|
||||||
assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
|
assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
|
||||||
assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
|
assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
|
||||||
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
|
if (canCheckBigramProbability(formatVersion)) {
|
||||||
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
|
assertEquals(bigramProbability, binaryDictionary.getBigramProbability("aaa", "abb"));
|
||||||
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
|
assertEquals(bigramProbability, binaryDictionary.getBigramProbability("aaa", "bcc"));
|
||||||
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
|
assertEquals(bigramProbability, binaryDictionary.getBigramProbability("abb", "aaa"));
|
||||||
assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
|
assertEquals(bigramProbability, binaryDictionary.getBigramProbability("abb", "bcc"));
|
||||||
assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
|
}
|
||||||
assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
|
assertFalse(binaryDictionary.isValidBigram("bcc", "aaa"));
|
||||||
|
assertFalse(binaryDictionary.isValidBigram("bcc", "bbc"));
|
||||||
|
assertFalse(binaryDictionary.isValidBigram("aaa", "aaa"));
|
||||||
binaryDictionary.flushWithGC();
|
binaryDictionary.flushWithGC();
|
||||||
binaryDictionary.close();
|
binaryDictionary.close();
|
||||||
|
|
||||||
|
@ -617,7 +632,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
|
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
|
||||||
bigramWords.add(bigram);
|
bigramWords.add(bigram);
|
||||||
final int bigramProbability = random.nextInt(0xF);
|
final int unigramProbability = unigramProbabilities.get(word1);
|
||||||
|
final int bigramProbability =
|
||||||
|
unigramProbability + random.nextInt(0xFF - unigramProbability);
|
||||||
bigramProbabilities.put(bigram, bigramProbability);
|
bigramProbabilities.put(bigram, bigramProbability);
|
||||||
addBigramWords(binaryDictionary, word0, word1, bigramProbability);
|
addBigramWords(binaryDictionary, word0, word1, bigramProbability);
|
||||||
}
|
}
|
||||||
|
@ -628,13 +645,15 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
|
||||||
|
|
||||||
for (final Pair<String, String> bigram : bigramWords) {
|
for (final Pair<String, String> bigram : bigramWords) {
|
||||||
final int unigramProbability = unigramProbabilities.get(bigram.second);
|
|
||||||
final int bigramProbability = bigramProbabilities.get(bigram);
|
final int bigramProbability = bigramProbabilities.get(bigram);
|
||||||
final int probability = binaryDictionary.calculateProbability(unigramProbability,
|
assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
|
||||||
bigramProbability);
|
binaryDictionary.isValidBigram(bigram.first, bigram.second));
|
||||||
assertEquals(probability,
|
if (canCheckBigramProbability(formatVersion)) {
|
||||||
binaryDictionary.getBigramProbability(bigram.first, bigram.second));
|
assertEquals(bigramProbability,
|
||||||
|
binaryDictionary.getBigramProbability(bigram.first, bigram.second));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dictFile.delete();
|
dictFile.delete();
|
||||||
|
@ -709,7 +728,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
if (TextUtils.equals(word0, word1)) {
|
if (TextUtils.equals(word0, word1)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
final int bigramProbability = random.nextInt(0xF);
|
final int unigramProbability = unigramProbabilities.get(word1);
|
||||||
|
final int bigramProbability =
|
||||||
|
unigramProbability + random.nextInt(0xFF - unigramProbability);
|
||||||
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
|
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
|
||||||
bigramWords.add(bigram);
|
bigramWords.add(bigram);
|
||||||
bigramProbabilities.put(bigram, bigramProbability);
|
bigramProbabilities.put(bigram, bigramProbability);
|
||||||
|
@ -734,17 +755,20 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
// Test whether the all bigram operations are collectlly handled.
|
// Test whether the all bigram operations are collectlly handled.
|
||||||
for (int i = 0; i < bigramWords.size(); i++) {
|
for (int i = 0; i < bigramWords.size(); i++) {
|
||||||
final Pair<String, String> bigram = bigramWords.get(i);
|
final Pair<String, String> bigram = bigramWords.get(i);
|
||||||
final int unigramProbability = unigramProbabilities.get(bigram.second);
|
|
||||||
final int probability;
|
final int probability;
|
||||||
if (bigramProbabilities.containsKey(bigram)) {
|
if (bigramProbabilities.containsKey(bigram)) {
|
||||||
final int bigramProbability = bigramProbabilities.get(bigram);
|
final int bigramProbability = bigramProbabilities.get(bigram);
|
||||||
probability = binaryDictionary.calculateProbability(unigramProbability,
|
probability = bigramProbability;
|
||||||
bigramProbability);
|
|
||||||
} else {
|
} else {
|
||||||
probability = Dictionary.NOT_A_PROBABILITY;
|
probability = Dictionary.NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
assertEquals(probability,
|
|
||||||
binaryDictionary.getBigramProbability(bigram.first, bigram.second));
|
if (canCheckBigramProbability(formatVersion)) {
|
||||||
|
assertEquals(probability,
|
||||||
|
binaryDictionary.getBigramProbability(bigram.first, bigram.second));
|
||||||
|
}
|
||||||
|
assertEquals(probability != Dictionary.NOT_A_PROBABILITY,
|
||||||
|
binaryDictionary.isValidBigram(bigram.first, bigram.second));
|
||||||
}
|
}
|
||||||
binaryDictionary.flushWithGC();
|
binaryDictionary.flushWithGC();
|
||||||
binaryDictionary.close();
|
binaryDictionary.close();
|
||||||
|
@ -894,7 +918,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
for (int i = 0; i < languageModelParams.length; i++) {
|
for (int i = 0; i < languageModelParams.length; i++) {
|
||||||
final String word = CodePointUtils.generateWord(random, codePointSet);
|
final String word = CodePointUtils.generateWord(random, codePointSet);
|
||||||
final int probability = random.nextInt(0xFF);
|
final int probability = random.nextInt(0xFF);
|
||||||
final int bigramProbability = random.nextInt(0xF);
|
final int bigramProbability = probability + random.nextInt(0xFF - probability);
|
||||||
unigramProbabilities.put(word, probability);
|
unigramProbabilities.put(word, probability);
|
||||||
if (prevWord == null) {
|
if (prevWord == null) {
|
||||||
languageModelParams[i] = new LanguageModelParam(word, probability,
|
languageModelParams[i] = new LanguageModelParam(word, probability,
|
||||||
|
@ -920,11 +944,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) {
|
for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) {
|
||||||
final String word0 = entry.getKey().first;
|
final String word0 = entry.getKey().first;
|
||||||
final String word1 = entry.getKey().second;
|
final String word1 = entry.getKey().second;
|
||||||
final int unigramProbability = unigramProbabilities.get(word1);
|
|
||||||
final int bigramProbability = entry.getValue();
|
final int bigramProbability = entry.getValue();
|
||||||
final int probability = binaryDictionary.calculateProbability(
|
assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
|
||||||
unigramProbability, bigramProbability);
|
binaryDictionary.isValidBigram(word0, word1));
|
||||||
assertEquals(probability, binaryDictionary.getBigramProbability(word0, word1));
|
if (canCheckBigramProbability(formatVersion)) {
|
||||||
|
assertEquals(bigramProbability,
|
||||||
|
binaryDictionary.getBigramProbability(word0, word1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -994,7 +1020,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
final String word0 = words.get(word0Index);
|
final String word0 = words.get(word0Index);
|
||||||
final String word1 = words.get(word1Index);
|
final String word1 = words.get(word1Index);
|
||||||
final int bigramProbability = random.nextInt(0xF);
|
final int unigramProbability = wordProbabilities.get(word1);
|
||||||
|
final int bigramProbability =
|
||||||
|
unigramProbability + random.nextInt(0xFF - unigramProbability);
|
||||||
binaryDictionary.addBigramWords(word0, word1, bigramProbability,
|
binaryDictionary.addBigramWords(word0, word1, bigramProbability,
|
||||||
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
|
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
|
||||||
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
|
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
|
||||||
|
@ -1019,12 +1047,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
|
for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
|
||||||
final String word1 = wordProperty.mBigrams.get(j).mWord;
|
final String word1 = wordProperty.mBigrams.get(j).mWord;
|
||||||
assertTrue(bigramWord1s.contains(word1));
|
assertTrue(bigramWord1s.contains(word1));
|
||||||
final int bigramProbabilityDelta = bigramProbabilities.get(
|
if (canCheckBigramProbability(formatVersion)) {
|
||||||
new Pair<String, String>(word0, word1));
|
final int bigramProbability = bigramProbabilities.get(
|
||||||
final int unigramProbability = wordProbabilities.get(word1);
|
new Pair<String, String>(word0, word1));
|
||||||
final int bigramProbablity = binaryDictionary.calculateProbability(
|
assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
|
||||||
unigramProbability, bigramProbabilityDelta);
|
}
|
||||||
assertEquals(wordProperty.mBigrams.get(j).getProbability(), bigramProbablity);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1082,7 +1109,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
final String word0 = words.get(word0Index);
|
final String word0 = words.get(word0Index);
|
||||||
final String word1 = words.get(word1Index);
|
final String word1 = words.get(word1Index);
|
||||||
final int bigramProbability = random.nextInt(0xF);
|
final int unigramProbability = wordProbabilitiesToCheckLater.get(word1);
|
||||||
|
final int bigramProbability =
|
||||||
|
unigramProbability + random.nextInt(0xFF - unigramProbability);
|
||||||
binaryDictionary.addBigramWords(word0, word1, bigramProbability,
|
binaryDictionary.addBigramWords(word0, word1, bigramProbability,
|
||||||
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
|
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
|
||||||
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
|
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
|
||||||
|
@ -1113,12 +1142,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
|
for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
|
||||||
final String word1 = wordProperty.mBigrams.get(j).mWord;
|
final String word1 = wordProperty.mBigrams.get(j).mWord;
|
||||||
assertTrue(bigramWord1s.contains(word1));
|
assertTrue(bigramWord1s.contains(word1));
|
||||||
final int unigramProbability = wordProbabilitiesToCheckLater.get(word1);
|
|
||||||
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
|
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
|
||||||
final int bigramProbabilityDelta = bigramProbabilitiesToCheckLater.get(bigram);
|
if (canCheckBigramProbability(formatVersion)) {
|
||||||
final int bigramProbablity = binaryDictionary.calculateProbability(
|
final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
|
||||||
unigramProbability, bigramProbabilityDelta);
|
assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
|
||||||
assertEquals(wordProperty.mBigrams.get(j).getProbability(), bigramProbablity);
|
}
|
||||||
bigramSet.remove(bigram);
|
bigramSet.remove(bigram);
|
||||||
}
|
}
|
||||||
token = result.mNextToken;
|
token = result.mNextToken;
|
||||||
|
@ -1286,7 +1314,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
final int unigramProbability = 100;
|
final int unigramProbability = 100;
|
||||||
addUnigramWord(binaryDictionary, "aaa", unigramProbability);
|
addUnigramWord(binaryDictionary, "aaa", unigramProbability);
|
||||||
addUnigramWord(binaryDictionary, "bbb", unigramProbability);
|
addUnigramWord(binaryDictionary, "bbb", unigramProbability);
|
||||||
final int bigramProbability = 10;
|
final int bigramProbability = 150;
|
||||||
addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
|
addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
|
||||||
final int shortcutProbability = 10;
|
final int shortcutProbability = 10;
|
||||||
binaryDictionary.addUnigramWord("ccc", unigramProbability, "xxx", shortcutProbability,
|
binaryDictionary.addUnigramWord("ccc", unigramProbability, "xxx", shortcutProbability,
|
||||||
|
@ -1303,7 +1331,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
|
assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
|
||||||
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
|
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
|
||||||
assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
|
assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
|
||||||
// TODO: Add tests for bigram frequency when the implementation gets ready.
|
if (canCheckBigramProbability(toFormatVersion)) {
|
||||||
|
assertEquals(bigramProbability, binaryDictionary.getBigramProbability("aaa", "bbb"));
|
||||||
|
}
|
||||||
assertTrue(binaryDictionary.isValidBigram("aaa", "bbb"));
|
assertTrue(binaryDictionary.isValidBigram("aaa", "bbb"));
|
||||||
WordProperty wordProperty = binaryDictionary.getWordProperty("ccc");
|
WordProperty wordProperty = binaryDictionary.getWordProperty("ccc");
|
||||||
assertEquals(1, wordProperty.mShortcutTargets.size());
|
assertEquals(1, wordProperty.mShortcutTargets.size());
|
||||||
|
@ -1362,7 +1392,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
final String word0 = words.get(word0Index);
|
final String word0 = words.get(word0Index);
|
||||||
final String word1 = words.get(word1Index);
|
final String word1 = words.get(word1Index);
|
||||||
final int bigramProbability = random.nextInt(0xF);
|
final int unigramProbability = unigramProbabilities.get(word1);
|
||||||
|
final int bigramProbability =
|
||||||
|
random.nextInt(0xFF - unigramProbability) + unigramProbability;
|
||||||
binaryDictionary.addBigramWords(word0, word1, bigramProbability,
|
binaryDictionary.addBigramWords(word0, word1, bigramProbability,
|
||||||
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
|
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
|
||||||
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
|
@ -1381,7 +1413,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
|
binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
|
||||||
|
|
||||||
for (final Pair<String, String> bigram : bigrams) {
|
for (final Pair<String, String> bigram : bigrams) {
|
||||||
// TODO: Add tests for bigram frequency when the implementation gets ready.
|
if (canCheckBigramProbability(toFormatVersion)) {
|
||||||
|
assertEquals((int)bigramProbabilities.get(bigram),
|
||||||
|
binaryDictionary.getBigramProbability(bigram.first, bigram.second));
|
||||||
|
}
|
||||||
assertTrue(binaryDictionary.isValidBigram(bigram.first, bigram.second));
|
assertTrue(binaryDictionary.isValidBigram(bigram.first, bigram.second));
|
||||||
}
|
}
|
||||||
assertEquals(bigramProbabilities.size(), Integer.parseInt(
|
assertEquals(bigramProbabilities.size(), Integer.parseInt(
|
||||||
|
|
Loading…
Reference in a new issue