2013-09-30 11:53:35 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2013 The Android Open Source Project
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package com.android.inputmethod.latin;
|
|
|
|
|
|
|
|
import android.test.AndroidTestCase;
|
|
|
|
import android.test.suitebuilder.annotation.LargeTest;
|
2013-10-08 03:43:58 +00:00
|
|
|
import android.util.Pair;
|
2013-09-30 11:53:35 +00:00
|
|
|
|
2014-06-25 05:14:37 +00:00
|
|
|
import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
|
2014-03-27 06:30:32 +00:00
|
|
|
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
|
2013-10-07 08:05:24 +00:00
|
|
|
import com.android.inputmethod.latin.makedict.CodePointUtils;
|
2014-01-29 11:19:24 +00:00
|
|
|
import com.android.inputmethod.latin.makedict.DictDecoder;
|
2014-02-04 12:36:04 +00:00
|
|
|
import com.android.inputmethod.latin.makedict.DictionaryHeader;
|
2013-09-30 11:53:35 +00:00
|
|
|
import com.android.inputmethod.latin.makedict.FormatSpec;
|
2014-01-29 11:19:24 +00:00
|
|
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
|
|
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
|
|
|
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
2014-03-05 09:19:34 +00:00
|
|
|
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
|
2013-12-13 08:09:16 +00:00
|
|
|
import com.android.inputmethod.latin.utils.FileUtils;
|
2014-02-04 14:51:05 +00:00
|
|
|
import com.android.inputmethod.latin.utils.LocaleUtils;
|
2013-09-30 11:53:35 +00:00
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
import java.io.IOException;
|
2013-10-07 08:05:24 +00:00
|
|
|
import java.util.ArrayList;
|
2013-09-30 11:53:35 +00:00
|
|
|
import java.util.HashMap;
|
|
|
|
import java.util.Locale;
|
|
|
|
import java.util.Map;
|
2013-10-07 08:05:24 +00:00
|
|
|
import java.util.Random;
|
2013-12-13 08:09:16 +00:00
|
|
|
import java.util.concurrent.TimeUnit;
|
2013-09-30 11:53:35 +00:00
|
|
|
|
|
|
|
@LargeTest
|
|
|
|
public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|
|
|
private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
|
|
|
|
private static final String TEST_LOCALE = "test";
|
|
|
|
private static final int DUMMY_PROBABILITY = 0;
|
2014-05-14 11:37:19 +00:00
|
|
|
private static final int[] DICT_FORMAT_VERSIONS =
|
|
|
|
new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV };
|
2013-09-30 11:53:35 +00:00
|
|
|
|
2013-12-13 08:09:16 +00:00
|
|
|
private int mCurrentTime = 0;
|
|
|
|
|
2013-09-30 11:53:35 +00:00
|
|
|
@Override
|
|
|
|
protected void setUp() throws Exception {
|
|
|
|
super.setUp();
|
2013-12-13 08:09:16 +00:00
|
|
|
mCurrentTime = 0;
|
2013-09-30 11:53:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected void tearDown() throws Exception {
|
2014-01-15 22:55:57 +00:00
|
|
|
stopTestModeInNativeCode();
|
2014-02-13 10:31:31 +00:00
|
|
|
super.tearDown();
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
|
|
|
|
2014-05-23 10:58:58 +00:00
|
|
|
private static boolean supportsBeginningOfSentence(final int formatVersion) {
|
2014-05-23 11:20:56 +00:00
|
|
|
return formatVersion > FormatSpec.VERSION401;
|
2014-05-23 10:58:58 +00:00
|
|
|
}
|
|
|
|
|
2013-12-13 08:09:16 +00:00
|
|
|
private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
|
|
|
|
final int probability) {
|
2014-05-21 02:15:38 +00:00
|
|
|
binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
|
2013-12-13 08:09:16 +00:00
|
|
|
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
|
2014-05-23 10:58:58 +00:00
|
|
|
false /* isBeginningOfSentence */, false /* isNotAWord */,
|
|
|
|
false /* isBlacklisted */, mCurrentTime /* timestamp */);
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
|
|
|
|
final String word1, final int probability) {
|
2014-06-25 05:14:37 +00:00
|
|
|
binaryDictionary.addNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1, probability,
|
2013-12-13 08:09:16 +00:00
|
|
|
mCurrentTime /* timestamp */);
|
2013-09-30 11:53:35 +00:00
|
|
|
}
|
|
|
|
|
2014-05-21 02:15:38 +00:00
|
|
|
private static boolean isValidBigram(final BinaryDictionary binaryDictionary,
|
|
|
|
final String word0, final String word1) {
|
2014-06-25 05:14:37 +00:00
|
|
|
return binaryDictionary.isValidNgram(new PrevWordsInfo(new WordInfo(word0)), word1);
|
2014-05-21 02:15:38 +00:00
|
|
|
}
|
|
|
|
|
2013-09-30 11:53:35 +00:00
|
|
|
private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
|
2014-03-14 03:23:24 +00:00
|
|
|
// 30 days.
|
|
|
|
final int timeToElapse = (int)TimeUnit.SECONDS.convert(30, TimeUnit.DAYS);
|
2013-12-13 08:09:16 +00:00
|
|
|
mCurrentTime += timeToElapse;
|
2014-01-15 22:55:57 +00:00
|
|
|
setCurrentTimeForTestMode(mCurrentTime);
|
2013-12-13 08:09:16 +00:00
|
|
|
binaryDictionary.flushWithGC();
|
2013-09-30 11:53:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private void forcePassingLongTime(final BinaryDictionary binaryDictionary) {
|
2014-03-14 03:23:24 +00:00
|
|
|
// 365 days.
|
|
|
|
final int timeToElapse = (int)TimeUnit.SECONDS.convert(365, TimeUnit.DAYS);
|
2013-12-13 08:09:16 +00:00
|
|
|
mCurrentTime += timeToElapse;
|
2014-01-15 22:55:57 +00:00
|
|
|
setCurrentTimeForTestMode(mCurrentTime);
|
2013-12-13 08:09:16 +00:00
|
|
|
binaryDictionary.flushWithGC();
|
2013-09-30 11:53:35 +00:00
|
|
|
}
|
|
|
|
|
2013-12-13 08:09:16 +00:00
|
|
|
private File createEmptyDictionaryAndGetFile(final String dictId,
|
|
|
|
final int formatVersion) throws IOException {
|
2014-05-12 10:21:06 +00:00
|
|
|
if (formatVersion == FormatSpec.VERSION4
|
2014-05-14 11:37:19 +00:00
|
|
|
|| formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
|
|
|
|
|| formatVersion == FormatSpec.VERSION4_DEV) {
|
2014-05-12 10:21:06 +00:00
|
|
|
return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion);
|
2013-12-13 08:09:16 +00:00
|
|
|
} else {
|
|
|
|
throw new IOException("Dictionary format version " + formatVersion
|
|
|
|
+ " is not supported.");
|
|
|
|
}
|
|
|
|
}
|
2014-01-15 22:55:57 +00:00
|
|
|
|
2014-05-12 10:21:06 +00:00
|
|
|
private File createEmptyVer4DictionaryAndGetFile(final String dictId, final int formatVersion)
|
|
|
|
throws IOException {
|
2013-12-13 08:09:16 +00:00
|
|
|
final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
|
2013-09-30 11:53:35 +00:00
|
|
|
getContext().getCacheDir());
|
2013-12-13 08:09:16 +00:00
|
|
|
FileUtils.deleteRecursively(file);
|
2014-05-23 11:18:17 +00:00
|
|
|
Map<String, String> attributeMap = new HashMap<>();
|
2014-02-04 12:36:04 +00:00
|
|
|
attributeMap.put(DictionaryHeader.DICTIONARY_ID_KEY, dictId);
|
|
|
|
attributeMap.put(DictionaryHeader.DICTIONARY_VERSION_KEY,
|
2014-01-29 11:19:24 +00:00
|
|
|
String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())));
|
2014-02-04 12:36:04 +00:00
|
|
|
attributeMap.put(DictionaryHeader.USES_FORGETTING_CURVE_KEY,
|
|
|
|
DictionaryHeader.ATTRIBUTE_VALUE_TRUE);
|
|
|
|
attributeMap.put(DictionaryHeader.HAS_HISTORICAL_INFO_KEY,
|
|
|
|
DictionaryHeader.ATTRIBUTE_VALUE_TRUE);
|
2014-05-12 10:21:06 +00:00
|
|
|
if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion,
|
2014-02-04 14:51:05 +00:00
|
|
|
LocaleUtils.constructLocaleFromString(TEST_LOCALE), attributeMap)) {
|
2013-09-30 11:53:35 +00:00
|
|
|
return file;
|
|
|
|
} else {
|
2013-12-12 06:08:10 +00:00
|
|
|
throw new IOException("Empty dictionary " + file.getAbsolutePath()
|
2014-05-14 11:37:19 +00:00
|
|
|
+ " cannot be created. Foramt version: " + formatVersion);
|
2013-09-30 11:53:35 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-15 22:55:57 +00:00
|
|
|
private static int setCurrentTimeForTestMode(final int currentTime) {
|
2014-03-05 09:19:34 +00:00
|
|
|
return BinaryDictionaryUtils.setCurrentTimeForTest(currentTime);
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
|
|
|
|
2014-01-15 22:55:57 +00:00
|
|
|
private static int stopTestModeInNativeCode() {
|
2014-03-05 09:19:34 +00:00
|
|
|
return BinaryDictionaryUtils.setCurrentTimeForTest(-1);
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
|
|
|
|
2014-01-29 11:19:24 +00:00
|
|
|
public void testReadDictInJavaSide() {
|
2014-05-14 11:37:19 +00:00
|
|
|
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
|
|
|
testReadDictInJavaSide(formatVersion);
|
|
|
|
}
|
2014-01-29 11:19:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private void testReadDictInJavaSide(final int formatVersion) {
|
|
|
|
setCurrentTimeForTestMode(mCurrentTime);
|
|
|
|
File dictFile = null;
|
|
|
|
try {
|
|
|
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
|
|
|
} catch (IOException e) {
|
|
|
|
fail("IOException while writing an initial dictionary : " + e);
|
|
|
|
}
|
|
|
|
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
|
|
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
|
|
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
|
|
|
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, "ab", DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
|
|
|
|
addBigramWords(binaryDictionary, "a", "aaa", DUMMY_PROBABILITY);
|
|
|
|
binaryDictionary.flushWithGC();
|
|
|
|
binaryDictionary.close();
|
|
|
|
|
2014-03-27 06:30:32 +00:00
|
|
|
final DictDecoder dictDecoder =
|
|
|
|
BinaryDictIOUtils.getDictDecoder(dictFile, 0, dictFile.length());
|
2014-01-29 11:19:24 +00:00
|
|
|
try {
|
2014-02-14 09:31:41 +00:00
|
|
|
final FusionDictionary dict =
|
|
|
|
dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
|
2014-01-29 11:19:24 +00:00
|
|
|
PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, "a");
|
|
|
|
assertNotNull(ptNode);
|
|
|
|
assertTrue(ptNode.isTerminal());
|
|
|
|
assertNotNull(ptNode.getBigram("aaa"));
|
|
|
|
ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, "ab");
|
|
|
|
assertNotNull(ptNode);
|
|
|
|
assertTrue(ptNode.isTerminal());
|
|
|
|
ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa");
|
|
|
|
assertNotNull(ptNode);
|
|
|
|
assertTrue(ptNode.isTerminal());
|
|
|
|
} catch (IOException e) {
|
|
|
|
fail("IOException while reading dictionary: " + e);
|
|
|
|
} catch (UnsupportedFormatException e) {
|
|
|
|
fail("Unsupported format: " + e);
|
|
|
|
}
|
|
|
|
dictFile.delete();
|
|
|
|
}
|
|
|
|
|
2013-12-13 08:09:16 +00:00
|
|
|
public void testControlCurrentTime() {
|
|
|
|
final int TEST_COUNT = 1000;
|
|
|
|
final long seed = System.currentTimeMillis();
|
|
|
|
final Random random = new Random(seed);
|
2014-01-15 22:55:57 +00:00
|
|
|
final int startTime = stopTestModeInNativeCode();
|
2013-12-13 08:09:16 +00:00
|
|
|
for (int i = 0; i < TEST_COUNT; i++) {
|
|
|
|
final int currentTime = random.nextInt(Integer.MAX_VALUE);
|
2014-01-15 22:55:57 +00:00
|
|
|
final int currentTimeInNativeCode = setCurrentTimeForTestMode(currentTime);
|
|
|
|
assertEquals(currentTime, currentTimeInNativeCode);
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
2014-01-15 22:55:57 +00:00
|
|
|
final int endTime = stopTestModeInNativeCode();
|
2013-12-13 08:09:16 +00:00
|
|
|
final int MAX_ALLOWED_ELAPSED_TIME = 10;
|
|
|
|
assertTrue(startTime <= endTime && endTime <= startTime + MAX_ALLOWED_ELAPSED_TIME);
|
|
|
|
}
|
|
|
|
|
2013-09-30 11:53:35 +00:00
|
|
|
public void testAddValidAndInvalidWords() {
|
2014-05-14 11:37:19 +00:00
|
|
|
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
|
|
|
testAddValidAndInvalidWords(formatVersion);
|
|
|
|
}
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private void testAddValidAndInvalidWords(final int formatVersion) {
|
2013-09-30 11:53:35 +00:00
|
|
|
File dictFile = null;
|
|
|
|
try {
|
2013-12-13 08:09:16 +00:00
|
|
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
2013-09-30 11:53:35 +00:00
|
|
|
} catch (IOException e) {
|
|
|
|
fail("IOException while writing an initial dictionary : " + e);
|
|
|
|
}
|
|
|
|
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
|
|
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
|
|
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
|
|
|
|
2013-12-13 08:09:16 +00:00
|
|
|
addUnigramWord(binaryDictionary, "a", Dictionary.NOT_A_PROBABILITY);
|
2013-09-30 11:53:35 +00:00
|
|
|
assertFalse(binaryDictionary.isValidWord("a"));
|
2013-12-13 08:09:16 +00:00
|
|
|
addUnigramWord(binaryDictionary, "a", Dictionary.NOT_A_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, "a", Dictionary.NOT_A_PROBABILITY);
|
2013-09-30 11:53:35 +00:00
|
|
|
assertTrue(binaryDictionary.isValidWord("a"));
|
|
|
|
|
2013-12-13 08:09:16 +00:00
|
|
|
addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY);
|
2013-09-30 11:53:35 +00:00
|
|
|
assertTrue(binaryDictionary.isValidWord("b"));
|
|
|
|
|
2013-12-13 08:09:16 +00:00
|
|
|
addBigramWords(binaryDictionary, "a", "b", Dictionary.NOT_A_PROBABILITY);
|
2014-05-21 02:15:38 +00:00
|
|
|
assertFalse(isValidBigram(binaryDictionary, "a", "b"));
|
2013-12-13 08:09:16 +00:00
|
|
|
addBigramWords(binaryDictionary, "a", "b", Dictionary.NOT_A_PROBABILITY);
|
2014-05-21 02:15:38 +00:00
|
|
|
assertTrue(isValidBigram(binaryDictionary, "a", "b"));
|
2013-09-30 11:53:35 +00:00
|
|
|
|
2013-12-13 08:09:16 +00:00
|
|
|
addUnigramWord(binaryDictionary, "c", DUMMY_PROBABILITY);
|
|
|
|
addBigramWords(binaryDictionary, "a", "c", DUMMY_PROBABILITY);
|
2014-05-21 02:15:38 +00:00
|
|
|
assertTrue(isValidBigram(binaryDictionary, "a", "c"));
|
2013-09-30 11:53:35 +00:00
|
|
|
|
2013-10-08 03:43:58 +00:00
|
|
|
// Add bigrams of not valid unigrams.
|
2013-12-13 08:09:16 +00:00
|
|
|
addBigramWords(binaryDictionary, "x", "y", Dictionary.NOT_A_PROBABILITY);
|
2014-05-21 02:15:38 +00:00
|
|
|
assertFalse(isValidBigram(binaryDictionary, "x", "y"));
|
2013-12-13 08:09:16 +00:00
|
|
|
addBigramWords(binaryDictionary, "x", "y", DUMMY_PROBABILITY);
|
2014-05-21 02:15:38 +00:00
|
|
|
assertFalse(isValidBigram(binaryDictionary, "x", "y"));
|
2013-10-08 03:43:58 +00:00
|
|
|
|
2013-09-30 11:53:35 +00:00
|
|
|
binaryDictionary.close();
|
|
|
|
dictFile.delete();
|
|
|
|
}
|
|
|
|
|
|
|
|
public void testDecayingProbability() {
|
2014-05-14 11:37:19 +00:00
|
|
|
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
|
|
|
testDecayingProbability(formatVersion);
|
|
|
|
}
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private void testDecayingProbability(final int formatVersion) {
|
2013-09-30 11:53:35 +00:00
|
|
|
File dictFile = null;
|
|
|
|
try {
|
2013-12-13 08:09:16 +00:00
|
|
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
2013-09-30 11:53:35 +00:00
|
|
|
} catch (IOException e) {
|
|
|
|
fail("IOException while writing an initial dictionary : " + e);
|
|
|
|
}
|
|
|
|
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
|
|
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
|
|
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
|
|
|
|
2013-12-13 08:09:16 +00:00
|
|
|
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
|
2013-09-30 11:53:35 +00:00
|
|
|
assertTrue(binaryDictionary.isValidWord("a"));
|
|
|
|
forcePassingShortTime(binaryDictionary);
|
|
|
|
assertFalse(binaryDictionary.isValidWord("a"));
|
|
|
|
|
2013-12-13 08:09:16 +00:00
|
|
|
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
|
|
|
|
assertTrue(binaryDictionary.isValidWord("a"));
|
2013-09-30 11:53:35 +00:00
|
|
|
forcePassingShortTime(binaryDictionary);
|
|
|
|
assertTrue(binaryDictionary.isValidWord("a"));
|
|
|
|
forcePassingLongTime(binaryDictionary);
|
|
|
|
assertFalse(binaryDictionary.isValidWord("a"));
|
|
|
|
|
2013-12-13 08:09:16 +00:00
|
|
|
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY);
|
|
|
|
addBigramWords(binaryDictionary, "a", "b", DUMMY_PROBABILITY);
|
2014-05-21 02:15:38 +00:00
|
|
|
assertTrue(isValidBigram(binaryDictionary, "a", "b"));
|
2013-09-30 11:53:35 +00:00
|
|
|
forcePassingShortTime(binaryDictionary);
|
2014-05-21 02:15:38 +00:00
|
|
|
assertFalse(isValidBigram(binaryDictionary, "a", "b"));
|
2013-09-30 11:53:35 +00:00
|
|
|
|
2013-12-13 08:09:16 +00:00
|
|
|
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY);
|
|
|
|
addBigramWords(binaryDictionary, "a", "b", DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY);
|
|
|
|
addBigramWords(binaryDictionary, "a", "b", DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY);
|
|
|
|
addBigramWords(binaryDictionary, "a", "b", DUMMY_PROBABILITY);
|
2014-05-21 02:15:38 +00:00
|
|
|
assertTrue(isValidBigram(binaryDictionary, "a", "b"));
|
2013-09-30 11:53:35 +00:00
|
|
|
forcePassingShortTime(binaryDictionary);
|
2014-05-21 02:15:38 +00:00
|
|
|
assertTrue(isValidBigram(binaryDictionary, "a", "b"));
|
2013-09-30 11:53:35 +00:00
|
|
|
forcePassingLongTime(binaryDictionary);
|
2014-05-21 02:15:38 +00:00
|
|
|
assertFalse(isValidBigram(binaryDictionary, "a", "b"));
|
2013-09-30 11:53:35 +00:00
|
|
|
|
|
|
|
binaryDictionary.close();
|
|
|
|
dictFile.delete();
|
|
|
|
}
|
2013-10-07 08:05:24 +00:00
|
|
|
|
|
|
|
public void testAddManyUnigramsToDecayingDict() {
|
2014-05-14 11:37:19 +00:00
|
|
|
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
|
|
|
testAddManyUnigramsToDecayingDict(formatVersion);
|
|
|
|
}
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private void testAddManyUnigramsToDecayingDict(final int formatVersion) {
|
2013-10-07 08:05:24 +00:00
|
|
|
final int unigramCount = 30000;
|
|
|
|
final int unigramTypedCount = 100000;
|
|
|
|
final int codePointSetSize = 50;
|
|
|
|
final long seed = System.currentTimeMillis();
|
|
|
|
final Random random = new Random(seed);
|
|
|
|
|
|
|
|
File dictFile = null;
|
|
|
|
try {
|
2013-12-13 08:09:16 +00:00
|
|
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
2013-10-07 08:05:24 +00:00
|
|
|
} catch (IOException e) {
|
|
|
|
fail("IOException while writing an initial dictionary : " + e);
|
|
|
|
}
|
|
|
|
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
|
|
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
|
|
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
2014-01-15 22:55:57 +00:00
|
|
|
setCurrentTimeForTestMode(mCurrentTime);
|
2013-10-07 08:05:24 +00:00
|
|
|
|
|
|
|
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
2014-05-23 11:18:17 +00:00
|
|
|
final ArrayList<String> words = new ArrayList<>();
|
2013-10-07 08:05:24 +00:00
|
|
|
|
|
|
|
for (int i = 0; i < unigramCount; i++) {
|
|
|
|
final String word = CodePointUtils.generateWord(random, codePointSet);
|
|
|
|
words.add(word);
|
|
|
|
}
|
|
|
|
|
|
|
|
final int maxUnigramCount = Integer.parseInt(
|
2014-01-15 22:55:57 +00:00
|
|
|
binaryDictionary.getPropertyForTest(BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY));
|
2013-10-07 08:05:24 +00:00
|
|
|
for (int i = 0; i < unigramTypedCount; i++) {
|
|
|
|
final String word = words.get(random.nextInt(words.size()));
|
2013-12-13 08:09:16 +00:00
|
|
|
addUnigramWord(binaryDictionary, word, DUMMY_PROBABILITY);
|
2013-10-07 08:05:24 +00:00
|
|
|
|
|
|
|
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
|
|
|
final int unigramCountBeforeGC =
|
2014-01-15 22:55:57 +00:00
|
|
|
Integer.parseInt(binaryDictionary.getPropertyForTest(
|
2013-10-07 08:05:24 +00:00
|
|
|
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
|
|
|
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
2013-12-13 08:09:16 +00:00
|
|
|
forcePassingShortTime(binaryDictionary);
|
2013-10-07 08:05:24 +00:00
|
|
|
}
|
|
|
|
final int unigramCountAfterGC =
|
2014-01-15 22:55:57 +00:00
|
|
|
Integer.parseInt(binaryDictionary.getPropertyForTest(
|
2013-10-07 08:05:24 +00:00
|
|
|
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
|
|
|
assertTrue(unigramCountBeforeGC > unigramCountAfterGC);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-15 22:55:57 +00:00
|
|
|
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTest(
|
2013-10-07 08:05:24 +00:00
|
|
|
BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0);
|
2014-01-15 22:55:57 +00:00
|
|
|
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTest(
|
2013-10-07 08:05:24 +00:00
|
|
|
BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount);
|
2013-12-13 08:09:16 +00:00
|
|
|
forcePassingLongTime(binaryDictionary);
|
2014-01-15 22:55:57 +00:00
|
|
|
assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForTest(
|
2013-12-13 08:09:16 +00:00
|
|
|
BinaryDictionary.UNIGRAM_COUNT_QUERY)));
|
|
|
|
}
|
|
|
|
|
|
|
|
public void testOverflowUnigrams() {
|
2014-05-14 11:37:19 +00:00
|
|
|
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
|
|
|
testOverflowUnigrams(formatVersion);
|
|
|
|
}
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private void testOverflowUnigrams(final int formatVersion) {
|
|
|
|
final int unigramCount = 20000;
|
2014-03-14 03:23:24 +00:00
|
|
|
final int eachUnigramTypedCount = 2;
|
2013-12-13 08:09:16 +00:00
|
|
|
final int strongUnigramTypedCount = 20;
|
|
|
|
final int weakUnigramTypedCount = 1;
|
|
|
|
final int codePointSetSize = 50;
|
|
|
|
final long seed = System.currentTimeMillis();
|
|
|
|
final Random random = new Random(seed);
|
|
|
|
|
|
|
|
File dictFile = null;
|
|
|
|
try {
|
|
|
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
|
|
|
} catch (IOException e) {
|
|
|
|
fail("IOException while writing an initial dictionary : " + e);
|
|
|
|
}
|
|
|
|
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
|
|
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
|
|
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
2014-01-15 22:55:57 +00:00
|
|
|
setCurrentTimeForTestMode(mCurrentTime);
|
2013-12-13 08:09:16 +00:00
|
|
|
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
|
|
|
|
|
|
|
final String strong = "strong";
|
|
|
|
final String weak = "weak";
|
|
|
|
for (int j = 0; j < strongUnigramTypedCount; j++) {
|
|
|
|
addUnigramWord(binaryDictionary, strong, DUMMY_PROBABILITY);
|
|
|
|
}
|
|
|
|
for (int j = 0; j < weakUnigramTypedCount; j++) {
|
|
|
|
addUnigramWord(binaryDictionary, weak, DUMMY_PROBABILITY);
|
|
|
|
}
|
|
|
|
assertTrue(binaryDictionary.isValidWord(strong));
|
|
|
|
assertTrue(binaryDictionary.isValidWord(weak));
|
|
|
|
|
|
|
|
for (int i = 0; i < unigramCount; i++) {
|
|
|
|
final String word = CodePointUtils.generateWord(random, codePointSet);
|
|
|
|
for (int j = 0; j < eachUnigramTypedCount; j++) {
|
|
|
|
addUnigramWord(binaryDictionary, word, DUMMY_PROBABILITY);
|
|
|
|
}
|
|
|
|
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
|
|
|
final int unigramCountBeforeGC =
|
2014-01-15 22:55:57 +00:00
|
|
|
Integer.parseInt(binaryDictionary.getPropertyForTest(
|
2013-12-13 08:09:16 +00:00
|
|
|
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
|
|
|
assertTrue(binaryDictionary.isValidWord(strong));
|
|
|
|
assertTrue(binaryDictionary.isValidWord(weak));
|
|
|
|
binaryDictionary.flushWithGC();
|
|
|
|
final int unigramCountAfterGC =
|
2014-01-15 22:55:57 +00:00
|
|
|
Integer.parseInt(binaryDictionary.getPropertyForTest(
|
2013-12-13 08:09:16 +00:00
|
|
|
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
|
|
|
assertTrue(unigramCountBeforeGC > unigramCountAfterGC);
|
|
|
|
assertFalse(binaryDictionary.isValidWord(weak));
|
|
|
|
assertTrue(binaryDictionary.isValidWord(strong));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2013-10-07 08:05:24 +00:00
|
|
|
}
|
2013-10-08 03:43:58 +00:00
|
|
|
|
|
|
|
public void testAddManyBigramsToDecayingDict() {
|
2014-05-14 11:37:19 +00:00
|
|
|
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
|
|
|
testAddManyBigramsToDecayingDict(formatVersion);
|
|
|
|
}
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private void testAddManyBigramsToDecayingDict(final int formatVersion) {
|
2013-10-08 03:43:58 +00:00
|
|
|
final int unigramCount = 5000;
|
|
|
|
final int bigramCount = 30000;
|
|
|
|
final int bigramTypedCount = 100000;
|
|
|
|
final int codePointSetSize = 50;
|
|
|
|
final long seed = System.currentTimeMillis();
|
|
|
|
final Random random = new Random(seed);
|
|
|
|
|
|
|
|
File dictFile = null;
|
|
|
|
try {
|
2013-12-13 08:09:16 +00:00
|
|
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
2013-10-08 03:43:58 +00:00
|
|
|
} catch (IOException e) {
|
|
|
|
fail("IOException while writing an initial dictionary : " + e);
|
|
|
|
}
|
|
|
|
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
|
|
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
|
|
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
2014-01-15 22:55:57 +00:00
|
|
|
setCurrentTimeForTestMode(mCurrentTime);
|
2013-10-08 03:43:58 +00:00
|
|
|
|
|
|
|
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
2014-05-23 11:18:17 +00:00
|
|
|
final ArrayList<String> words = new ArrayList<>();
|
|
|
|
final ArrayList<Pair<String, String>> bigrams = new ArrayList<>();
|
2013-10-08 03:43:58 +00:00
|
|
|
|
|
|
|
for (int i = 0; i < unigramCount; ++i) {
|
|
|
|
final String word = CodePointUtils.generateWord(random, codePointSet);
|
|
|
|
words.add(word);
|
|
|
|
}
|
|
|
|
for (int i = 0; i < bigramCount; ++i) {
|
|
|
|
final int word0Index = random.nextInt(words.size());
|
|
|
|
int word1Index = random.nextInt(words.size() - 1);
|
|
|
|
if (word1Index >= word0Index) {
|
|
|
|
word1Index += 1;
|
|
|
|
}
|
|
|
|
final String word0 = words.get(word0Index);
|
|
|
|
final String word1 = words.get(word1Index);
|
2014-05-23 11:18:17 +00:00
|
|
|
final Pair<String, String> bigram = new Pair<>(word0, word1);
|
2013-10-08 03:43:58 +00:00
|
|
|
bigrams.add(bigram);
|
|
|
|
}
|
|
|
|
|
|
|
|
final int maxBigramCount = Integer.parseInt(
|
2014-01-15 22:55:57 +00:00
|
|
|
binaryDictionary.getPropertyForTest(BinaryDictionary.MAX_BIGRAM_COUNT_QUERY));
|
2013-10-08 03:43:58 +00:00
|
|
|
for (int i = 0; i < bigramTypedCount; ++i) {
|
|
|
|
final Pair<String, String> bigram = bigrams.get(random.nextInt(bigrams.size()));
|
2013-12-13 08:09:16 +00:00
|
|
|
addUnigramWord(binaryDictionary, bigram.first, DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, bigram.second, DUMMY_PROBABILITY);
|
|
|
|
addBigramWords(binaryDictionary, bigram.first, bigram.second, DUMMY_PROBABILITY);
|
2013-10-08 03:43:58 +00:00
|
|
|
|
|
|
|
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
|
|
|
final int bigramCountBeforeGC =
|
2014-01-15 22:55:57 +00:00
|
|
|
Integer.parseInt(binaryDictionary.getPropertyForTest(
|
2013-10-08 03:43:58 +00:00
|
|
|
BinaryDictionary.BIGRAM_COUNT_QUERY));
|
|
|
|
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
2013-12-13 08:09:16 +00:00
|
|
|
forcePassingShortTime(binaryDictionary);
|
2013-10-08 03:43:58 +00:00
|
|
|
}
|
|
|
|
final int bigramCountAfterGC =
|
2014-01-15 22:55:57 +00:00
|
|
|
Integer.parseInt(binaryDictionary.getPropertyForTest(
|
2013-10-08 03:43:58 +00:00
|
|
|
BinaryDictionary.BIGRAM_COUNT_QUERY));
|
|
|
|
assertTrue(bigramCountBeforeGC > bigramCountAfterGC);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-15 22:55:57 +00:00
|
|
|
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTest(
|
2013-10-08 03:43:58 +00:00
|
|
|
BinaryDictionary.BIGRAM_COUNT_QUERY)) > 0);
|
2014-01-15 22:55:57 +00:00
|
|
|
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTest(
|
2013-10-08 03:43:58 +00:00
|
|
|
BinaryDictionary.BIGRAM_COUNT_QUERY)) <= maxBigramCount);
|
2013-12-13 08:09:16 +00:00
|
|
|
forcePassingLongTime(binaryDictionary);
|
2014-01-15 22:55:57 +00:00
|
|
|
assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForTest(
|
2013-12-13 08:09:16 +00:00
|
|
|
BinaryDictionary.BIGRAM_COUNT_QUERY)));
|
|
|
|
}
|
|
|
|
|
|
|
|
public void testOverflowBigrams() {
|
2014-05-14 11:37:19 +00:00
|
|
|
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
|
|
|
testOverflowBigrams(formatVersion);
|
|
|
|
}
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private void testOverflowBigrams(final int formatVersion) {
|
|
|
|
final int bigramCount = 20000;
|
|
|
|
final int unigramCount = 1000;
|
|
|
|
final int unigramTypedCount = 20;
|
2014-03-14 03:23:24 +00:00
|
|
|
final int eachBigramTypedCount = 2;
|
2013-12-13 08:09:16 +00:00
|
|
|
final int strongBigramTypedCount = 20;
|
|
|
|
final int weakBigramTypedCount = 1;
|
|
|
|
final int codePointSetSize = 50;
|
|
|
|
final long seed = System.currentTimeMillis();
|
|
|
|
final Random random = new Random(seed);
|
|
|
|
|
|
|
|
File dictFile = null;
|
|
|
|
try {
|
|
|
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
|
|
|
} catch (IOException e) {
|
|
|
|
fail("IOException while writing an initial dictionary : " + e);
|
|
|
|
}
|
|
|
|
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
|
|
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
|
|
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
2014-01-15 22:55:57 +00:00
|
|
|
setCurrentTimeForTestMode(mCurrentTime);
|
2013-12-13 08:09:16 +00:00
|
|
|
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
|
|
|
|
2014-05-23 11:18:17 +00:00
|
|
|
final ArrayList<String> words = new ArrayList<>();
|
2013-12-13 08:09:16 +00:00
|
|
|
for (int i = 0; i < unigramCount; i++) {
|
|
|
|
final String word = CodePointUtils.generateWord(random, codePointSet);
|
|
|
|
words.add(word);
|
|
|
|
for (int j = 0; j < unigramTypedCount; j++) {
|
|
|
|
addUnigramWord(binaryDictionary, word, DUMMY_PROBABILITY);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
final String strong = "strong";
|
|
|
|
final String weak = "weak";
|
|
|
|
final String target = "target";
|
|
|
|
for (int j = 0; j < unigramTypedCount; j++) {
|
|
|
|
addUnigramWord(binaryDictionary, strong, DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, weak, DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, target, DUMMY_PROBABILITY);
|
|
|
|
}
|
|
|
|
binaryDictionary.flushWithGC();
|
|
|
|
for (int j = 0; j < strongBigramTypedCount; j++) {
|
|
|
|
addBigramWords(binaryDictionary, strong, target, DUMMY_PROBABILITY);
|
|
|
|
}
|
|
|
|
for (int j = 0; j < weakBigramTypedCount; j++) {
|
|
|
|
addBigramWords(binaryDictionary, weak, target, DUMMY_PROBABILITY);
|
|
|
|
}
|
2014-05-21 02:15:38 +00:00
|
|
|
assertTrue(isValidBigram(binaryDictionary, strong, target));
|
|
|
|
assertTrue(isValidBigram(binaryDictionary, weak, target));
|
2013-12-13 08:09:16 +00:00
|
|
|
|
|
|
|
for (int i = 0; i < bigramCount; i++) {
|
|
|
|
final int word0Index = random.nextInt(words.size());
|
|
|
|
final String word0 = words.get(word0Index);
|
|
|
|
final int index = random.nextInt(words.size() - 1);
|
|
|
|
final int word1Index = (index >= word0Index) ? index + 1 : index;
|
|
|
|
final String word1 = words.get(word1Index);
|
|
|
|
|
|
|
|
for (int j = 0; j < eachBigramTypedCount; j++) {
|
|
|
|
addBigramWords(binaryDictionary, word0, word1, DUMMY_PROBABILITY);
|
|
|
|
}
|
|
|
|
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
|
|
|
final int bigramCountBeforeGC =
|
2014-01-15 22:55:57 +00:00
|
|
|
Integer.parseInt(binaryDictionary.getPropertyForTest(
|
2013-12-13 08:09:16 +00:00
|
|
|
BinaryDictionary.BIGRAM_COUNT_QUERY));
|
|
|
|
binaryDictionary.flushWithGC();
|
|
|
|
final int bigramCountAfterGC =
|
2014-01-15 22:55:57 +00:00
|
|
|
Integer.parseInt(binaryDictionary.getPropertyForTest(
|
2013-12-13 08:09:16 +00:00
|
|
|
BinaryDictionary.BIGRAM_COUNT_QUERY));
|
|
|
|
assertTrue(bigramCountBeforeGC > bigramCountAfterGC);
|
2014-05-21 02:15:38 +00:00
|
|
|
assertTrue(isValidBigram(binaryDictionary, strong, target));
|
|
|
|
assertFalse(isValidBigram(binaryDictionary, weak, target));
|
2013-12-13 08:09:16 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2013-10-08 03:43:58 +00:00
|
|
|
}
|
2014-05-12 10:21:06 +00:00
|
|
|
|
|
|
|
public void testDictMigration() {
|
2014-05-14 11:37:19 +00:00
|
|
|
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
|
|
|
testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
|
|
|
|
}
|
2014-05-12 10:21:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) {
|
|
|
|
setCurrentTimeForTestMode(mCurrentTime);
|
|
|
|
File dictFile = null;
|
|
|
|
try {
|
|
|
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
|
|
|
|
} catch (IOException e) {
|
|
|
|
fail("IOException while writing an initial dictionary : " + e);
|
|
|
|
}
|
|
|
|
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
|
|
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
|
|
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
|
|
|
addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
|
|
|
|
assertTrue(binaryDictionary.isValidWord("aaa"));
|
|
|
|
addUnigramWord(binaryDictionary, "bbb", Dictionary.NOT_A_PROBABILITY);
|
|
|
|
assertFalse(binaryDictionary.isValidWord("bbb"));
|
|
|
|
addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
|
|
|
|
addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
|
2014-05-12 16:28:30 +00:00
|
|
|
addUnigramWord(binaryDictionary, "abc", DUMMY_PROBABILITY);
|
|
|
|
addBigramWords(binaryDictionary, "aaa", "abc", DUMMY_PROBABILITY);
|
2014-05-21 02:15:38 +00:00
|
|
|
assertTrue(isValidBigram(binaryDictionary, "aaa", "abc"));
|
2014-05-12 16:28:30 +00:00
|
|
|
addBigramWords(binaryDictionary, "aaa", "bbb", Dictionary.NOT_A_PROBABILITY);
|
2014-05-21 02:15:38 +00:00
|
|
|
assertFalse(isValidBigram(binaryDictionary, "aaa", "bbb"));
|
2014-05-12 10:21:06 +00:00
|
|
|
|
|
|
|
assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
|
|
|
|
assertTrue(binaryDictionary.migrateTo(toFormatVersion));
|
|
|
|
assertTrue(binaryDictionary.isValidDictionary());
|
|
|
|
assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
|
|
|
|
assertTrue(binaryDictionary.isValidWord("aaa"));
|
|
|
|
assertFalse(binaryDictionary.isValidWord("bbb"));
|
|
|
|
assertTrue(binaryDictionary.getFrequency("aaa") < binaryDictionary.getFrequency("ccc"));
|
|
|
|
addUnigramWord(binaryDictionary, "bbb", Dictionary.NOT_A_PROBABILITY);
|
|
|
|
assertTrue(binaryDictionary.isValidWord("bbb"));
|
2014-05-21 02:15:38 +00:00
|
|
|
assertTrue(isValidBigram(binaryDictionary, "aaa", "abc"));
|
|
|
|
assertFalse(isValidBigram(binaryDictionary, "aaa", "bbb"));
|
2014-05-12 16:28:30 +00:00
|
|
|
addBigramWords(binaryDictionary, "aaa", "bbb", Dictionary.NOT_A_PROBABILITY);
|
2014-05-21 02:15:38 +00:00
|
|
|
assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
|
2014-05-12 10:21:06 +00:00
|
|
|
binaryDictionary.close();
|
|
|
|
dictFile.delete();
|
|
|
|
}
|
2014-05-23 10:58:58 +00:00
|
|
|
|
|
|
|
public void testBeginningOfSentence() {
|
|
|
|
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
|
|
|
if (supportsBeginningOfSentence(formatVersion)) {
|
|
|
|
testBeginningOfSentence(formatVersion);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private void testBeginningOfSentence(final int formatVersion) {
|
|
|
|
setCurrentTimeForTestMode(mCurrentTime);
|
|
|
|
File dictFile = null;
|
|
|
|
try {
|
|
|
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
|
|
|
} catch (IOException e) {
|
|
|
|
fail("IOException while writing an initial dictionary : " + e);
|
|
|
|
}
|
|
|
|
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
|
|
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
|
|
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
|
|
|
|
|
|
|
binaryDictionary.addUnigramEntry("", DUMMY_PROBABILITY, "" /* shortcutTarget */,
|
|
|
|
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
|
|
|
|
true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */,
|
|
|
|
mCurrentTime);
|
|
|
|
final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
|
|
|
|
addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
|
|
|
|
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
|
|
|
|
mCurrentTime);
|
|
|
|
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
|
|
|
|
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
|
|
|
|
mCurrentTime);
|
|
|
|
addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY);
|
|
|
|
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY,
|
|
|
|
mCurrentTime);
|
|
|
|
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
|
|
|
|
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
|
|
|
|
|
|
|
|
forcePassingLongTime(binaryDictionary);
|
|
|
|
assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
|
|
|
|
assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
|
|
|
|
|
|
|
|
addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
|
|
|
|
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
|
|
|
|
mCurrentTime);
|
|
|
|
addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY);
|
|
|
|
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY,
|
|
|
|
mCurrentTime);
|
|
|
|
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
|
|
|
|
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
|
|
|
|
binaryDictionary.close();
|
|
|
|
dictFile.delete();
|
|
|
|
}
|
2013-09-30 11:53:35 +00:00
|
|
|
}
|