am 7d7f0820
: Stop waking up to decay dynamic dictionaries.
* commit '7d7f082075768c03e4b1d4b84ba2e6ef6cba1132': Stop waking up to decay dynamic dictionaries.
This commit is contained in:
commit
1512af3662
10 changed files with 47 additions and 1116 deletions
|
@ -133,13 +133,6 @@
|
|||
</intent-filter>
|
||||
</receiver>
|
||||
|
||||
<receiver android:name=".personalization.DictionaryDecayBroadcastReciever"
|
||||
android:exported="false">
|
||||
<intent-filter>
|
||||
<action android:name="com.android.inputmethod.latin.personalization.DICT_DECAY" />
|
||||
</intent-filter>
|
||||
</receiver>
|
||||
|
||||
<receiver android:name=".DictionaryPackInstallBroadcastReceiver" android:exported="false">
|
||||
<intent-filter>
|
||||
<action android:name="com.android.inputmethod.dictionarypack.aosp.UNKNOWN_CLIENT" />
|
||||
|
|
|
@ -259,7 +259,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
/**
|
||||
* Check whether GC is needed and run GC if required.
|
||||
*/
|
||||
protected void runGCIfRequired(final boolean mindsBlockByGC) {
|
||||
public void runGCIfRequired(final boolean mindsBlockByGC) {
|
||||
asyncExecuteTaskWithWriteLock(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
|
|
|
@ -73,7 +73,6 @@ import com.android.inputmethod.latin.common.InputPointers;
|
|||
import com.android.inputmethod.latin.define.DebugFlags;
|
||||
import com.android.inputmethod.latin.define.ProductionFlags;
|
||||
import com.android.inputmethod.latin.inputlogic.InputLogic;
|
||||
import com.android.inputmethod.latin.personalization.DictionaryDecayBroadcastReciever;
|
||||
import com.android.inputmethod.latin.personalization.PersonalizationHelper;
|
||||
import com.android.inputmethod.latin.settings.Settings;
|
||||
import com.android.inputmethod.latin.settings.SettingsActivity;
|
||||
|
@ -584,7 +583,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
|
|||
dictDumpFilter.addAction(DictionaryDumpBroadcastReceiver.DICTIONARY_DUMP_INTENT_ACTION);
|
||||
registerReceiver(mDictionaryDumpBroadcastReceiver, dictDumpFilter);
|
||||
|
||||
DictionaryDecayBroadcastReciever.setUpIntervalAlarmForDictionaryDecaying(this);
|
||||
StatsUtils.onCreate(mSettings.getCurrent(), mRichImm);
|
||||
}
|
||||
|
||||
|
@ -704,7 +702,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
|
|||
unregisterReceiver(mDictionaryPackInstallReceiver);
|
||||
unregisterReceiver(mDictionaryDumpBroadcastReceiver);
|
||||
mStatsUtilsManager.onDestroy(this /* context */);
|
||||
DictionaryDecayBroadcastReciever.cancelIntervalAlarmForDictionaryDecaying(this);
|
||||
super.onDestroy();
|
||||
}
|
||||
|
||||
|
|
|
@ -1,85 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin.personalization;
|
||||
|
||||
import android.content.Context;
|
||||
|
||||
import com.android.inputmethod.latin.Dictionary;
|
||||
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
|
||||
import com.android.inputmethod.latin.makedict.DictionaryHeader;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* This class is a base class of a dictionary that supports decaying for the personalized language
|
||||
* model.
|
||||
*/
|
||||
public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableBinaryDictionary {
|
||||
private static final boolean DBG_DUMP_ON_CLOSE = false;
|
||||
|
||||
/** Any pair being typed or picked */
|
||||
public static final int FREQUENCY_FOR_TYPED = 2;
|
||||
|
||||
public static final int FREQUENCY_FOR_WORDS_IN_DICTS = FREQUENCY_FOR_TYPED;
|
||||
public static final int FREQUENCY_FOR_WORDS_NOT_IN_DICTS = Dictionary.NOT_A_PROBABILITY;
|
||||
|
||||
protected DecayingExpandableBinaryDictionaryBase(final Context context,
|
||||
final String dictName, final Locale locale, final String dictionaryType,
|
||||
final File dictFile) {
|
||||
super(context, dictName, locale, dictionaryType, dictFile);
|
||||
if (mLocale != null && mLocale.toString().length() > 1) {
|
||||
reloadDictionaryIfRequired();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
if (DBG_DUMP_ON_CLOSE) {
|
||||
dumpAllWordsForDebug();
|
||||
}
|
||||
// Flush pending writes.
|
||||
asyncFlushBinaryDictionary();
|
||||
super.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, String> getHeaderAttributeMap() {
|
||||
final Map<String, String> attributeMap = super.getHeaderAttributeMap();
|
||||
attributeMap.put(DictionaryHeader.USES_FORGETTING_CURVE_KEY,
|
||||
DictionaryHeader.ATTRIBUTE_VALUE_TRUE);
|
||||
attributeMap.put(DictionaryHeader.HAS_HISTORICAL_INFO_KEY,
|
||||
DictionaryHeader.ATTRIBUTE_VALUE_TRUE);
|
||||
return attributeMap;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void loadInitialContentsLocked() {
|
||||
// No initial contents.
|
||||
}
|
||||
|
||||
/* package */ void runGCIfRequired() {
|
||||
runGCIfRequired(false /* mindsBlockByGC */);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValidWord(final String word) {
|
||||
// Strings out of this dictionary should not be considered existing words.
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -1,89 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin.personalization;
|
||||
|
||||
import android.app.AlarmManager;
|
||||
import android.app.PendingIntent;
|
||||
import android.content.BroadcastReceiver;
|
||||
import android.content.Context;
|
||||
import android.content.Intent;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Broadcast receiver for periodically updating decaying dictionaries.
|
||||
*/
|
||||
public class DictionaryDecayBroadcastReciever extends BroadcastReceiver {
|
||||
/**
|
||||
* The root domain for the personalization.
|
||||
*/
|
||||
private static final String PERSONALIZATION_DOMAIN =
|
||||
"com.android.inputmethod.latin.personalization";
|
||||
|
||||
/**
|
||||
* The action of the intent to tell the time to decay dictionaries.
|
||||
*/
|
||||
private static final String DICTIONARY_DECAY_INTENT_ACTION =
|
||||
PERSONALIZATION_DOMAIN + ".DICT_DECAY";
|
||||
|
||||
/**
|
||||
* Interval to update for decaying dictionaries.
|
||||
*/
|
||||
static final long DICTIONARY_DECAY_INTERVAL_IN_MILLIS = TimeUnit.MINUTES.toMillis(60);
|
||||
|
||||
private static PendingIntent getPendingIntentForDictionaryDecay(final Context context) {
|
||||
final Intent updateIntent = new Intent(DICTIONARY_DECAY_INTENT_ACTION);
|
||||
updateIntent.setClass(context, DictionaryDecayBroadcastReciever.class);
|
||||
return PendingIntent.getBroadcast(context, 0 /* requestCode */,
|
||||
updateIntent, PendingIntent.FLAG_CANCEL_CURRENT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up interval alarm for dynamic dictionaries.
|
||||
*/
|
||||
public static void setUpIntervalAlarmForDictionaryDecaying(final Context context) {
|
||||
final AlarmManager alarmManager =
|
||||
(AlarmManager)context.getSystemService(Context.ALARM_SERVICE);
|
||||
if (null == alarmManager) {
|
||||
return;
|
||||
}
|
||||
final long alarmTriggerTimeInMillis =
|
||||
System.currentTimeMillis() + DICTIONARY_DECAY_INTERVAL_IN_MILLIS;
|
||||
alarmManager.setInexactRepeating(AlarmManager.RTC, alarmTriggerTimeInMillis,
|
||||
DICTIONARY_DECAY_INTERVAL_IN_MILLIS, getPendingIntentForDictionaryDecay(context));
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel interval alarm that has been set up.
|
||||
*/
|
||||
public static void cancelIntervalAlarmForDictionaryDecaying(final Context context) {
|
||||
final AlarmManager alarmManager =
|
||||
(AlarmManager)context.getSystemService(Context.ALARM_SERVICE);
|
||||
if (null == alarmManager) {
|
||||
return;
|
||||
}
|
||||
alarmManager.cancel(getPendingIntentForDictionaryDecay(context));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onReceive(final Context context, final Intent intent) {
|
||||
final String action = intent.getAction();
|
||||
if (action.equals(DICTIONARY_DECAY_INTENT_ACTION)) {
|
||||
PersonalizationHelper.runGCOnAllOpenedUserHistoryDictionaries();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -26,7 +26,6 @@ import java.io.FilenameFilter;
|
|||
import java.lang.ref.SoftReference;
|
||||
import java.util.Locale;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
|
@ -68,60 +67,28 @@ public class PersonalizationHelper {
|
|||
}
|
||||
}
|
||||
|
||||
private static int sCurrentTimestampForTesting = 0;
|
||||
public static void currentTimeChangedForTesting(final int currentTimestamp) {
|
||||
if (TimeUnit.MILLISECONDS.toSeconds(
|
||||
DictionaryDecayBroadcastReciever.DICTIONARY_DECAY_INTERVAL_IN_MILLIS)
|
||||
< currentTimestamp - sCurrentTimestampForTesting) {
|
||||
runGCOnAllOpenedUserHistoryDictionaries();
|
||||
}
|
||||
}
|
||||
|
||||
public static void runGCOnAllOpenedUserHistoryDictionaries() {
|
||||
runGCOnAllDictionariesIfRequired(sLangUserHistoryDictCache);
|
||||
}
|
||||
|
||||
private static <T extends DecayingExpandableBinaryDictionaryBase>
|
||||
void runGCOnAllDictionariesIfRequired(
|
||||
final ConcurrentHashMap<String, SoftReference<T>> dictionaryMap) {
|
||||
for (final ConcurrentHashMap.Entry<String, SoftReference<T>> entry
|
||||
: dictionaryMap.entrySet()) {
|
||||
final DecayingExpandableBinaryDictionaryBase dict = entry.getValue().get();
|
||||
if (dict != null) {
|
||||
dict.runGCIfRequired();
|
||||
} else {
|
||||
dictionaryMap.remove(entry.getKey());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void removeAllUserHistoryDictionaries(final Context context) {
|
||||
removeAllDictionaries(context, sLangUserHistoryDictCache,
|
||||
UserHistoryDictionary.NAME);
|
||||
}
|
||||
|
||||
private static <T extends DecayingExpandableBinaryDictionaryBase> void removeAllDictionaries(
|
||||
final Context context, final ConcurrentHashMap<String, SoftReference<T>> dictionaryMap,
|
||||
final String dictNamePrefix) {
|
||||
synchronized (dictionaryMap) {
|
||||
for (final ConcurrentHashMap.Entry<String, SoftReference<T>> entry
|
||||
: dictionaryMap.entrySet()) {
|
||||
synchronized (sLangUserHistoryDictCache) {
|
||||
for (final ConcurrentHashMap.Entry<String, SoftReference<UserHistoryDictionary>> entry
|
||||
: sLangUserHistoryDictCache.entrySet()) {
|
||||
if (entry.getValue() != null) {
|
||||
final DecayingExpandableBinaryDictionaryBase dict = entry.getValue().get();
|
||||
final UserHistoryDictionary dict = entry.getValue().get();
|
||||
if (dict != null) {
|
||||
dict.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
dictionaryMap.clear();
|
||||
sLangUserHistoryDictCache.clear();
|
||||
final File filesDir = context.getFilesDir();
|
||||
if (filesDir == null) {
|
||||
Log.e(TAG, "context.getFilesDir() returned null.");
|
||||
return;
|
||||
}
|
||||
if (!FileUtils.deleteFilteredFiles(filesDir, new DictFilter(dictNamePrefix))) {
|
||||
Log.e(TAG, "Cannot remove all existing dictionary files. filesDir: "
|
||||
+ filesDir.getAbsolutePath() + ", dictNamePrefix: " + dictNamePrefix);
|
||||
final boolean filesDeleted = FileUtils.deleteFilteredFiles(
|
||||
filesDir, new DictFilter(UserHistoryDictionary.NAME));
|
||||
if (!filesDeleted) {
|
||||
Log.e(TAG, "Cannot remove dictionary files. filesDir: " + filesDir.getAbsolutePath()
|
||||
+ ", dictNamePrefix: " + UserHistoryDictionary.NAME);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,9 +25,11 @@ import com.android.inputmethod.latin.ExpandableBinaryDictionary;
|
|||
import com.android.inputmethod.latin.NgramContext;
|
||||
import com.android.inputmethod.latin.define.DecoderSpecificConstants;
|
||||
import com.android.inputmethod.latin.define.ProductionFlags;
|
||||
import com.android.inputmethod.latin.makedict.DictionaryHeader;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
|
@ -37,17 +39,16 @@ import javax.annotation.Nullable;
|
|||
* auto-correction cancellation or manual picks. This allows the keyboard to adapt to the
|
||||
* typist over time.
|
||||
*/
|
||||
public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBase {
|
||||
public class UserHistoryDictionary extends ExpandableBinaryDictionary {
|
||||
static final String NAME = UserHistoryDictionary.class.getSimpleName();
|
||||
|
||||
// TODO: Make this constructor private
|
||||
UserHistoryDictionary(final Context context, final Locale locale,
|
||||
@Nullable final String account) {
|
||||
super(context,
|
||||
getUserHistoryDictName(NAME, locale, null /* dictFile */, account),
|
||||
locale,
|
||||
Dictionary.TYPE_USER_HISTORY,
|
||||
null /* dictFile */);
|
||||
super(context, getUserHistoryDictName(NAME, locale, null /* dictFile */, account), locale, Dictionary.TYPE_USER_HISTORY, null);
|
||||
if (mLocale != null && mLocale.toString().length() > 1) {
|
||||
reloadDictionaryIfRequired();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -103,4 +104,32 @@ public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBas
|
|||
userHistoryDictionary.updateEntriesForWord(ngramContext, word,
|
||||
isValid, 1 /* count */, timestamp);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
// Flush pending writes.
|
||||
asyncFlushBinaryDictionary();
|
||||
super.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, String> getHeaderAttributeMap() {
|
||||
final Map<String, String> attributeMap = super.getHeaderAttributeMap();
|
||||
attributeMap.put(DictionaryHeader.USES_FORGETTING_CURVE_KEY,
|
||||
DictionaryHeader.ATTRIBUTE_VALUE_TRUE);
|
||||
attributeMap.put(DictionaryHeader.HAS_HISTORICAL_INFO_KEY,
|
||||
DictionaryHeader.ATTRIBUTE_VALUE_TRUE);
|
||||
return attributeMap;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void loadInitialContentsLocked() {
|
||||
// No initial contents.
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValidWord(final String word) {
|
||||
// Strings out of this dictionary should not be considered existing words.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@ import com.android.inputmethod.latin.BinaryDictionary;
|
|||
import com.android.inputmethod.latin.common.StringUtils;
|
||||
import com.android.inputmethod.latin.makedict.DictionaryHeader;
|
||||
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
||||
import com.android.inputmethod.latin.personalization.PersonalizationHelper;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
@ -123,8 +122,6 @@ public final class BinaryDictionaryUtils {
|
|||
*/
|
||||
@UsedForTesting
|
||||
public static int setCurrentTimeForTest(final int currentTime) {
|
||||
final int currentNativeTimestamp = setCurrentTimeForTestNative(currentTime);
|
||||
PersonalizationHelper.currentTimeChangedForTesting(currentNativeTimestamp);
|
||||
return currentNativeTimestamp;
|
||||
return setCurrentTimeForTestNative(currentTime);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,818 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin;
|
||||
|
||||
import android.test.AndroidTestCase;
|
||||
import android.test.suitebuilder.annotation.LargeTest;
|
||||
import android.util.Pair;
|
||||
|
||||
import com.android.inputmethod.latin.NgramContext.WordInfo;
|
||||
import com.android.inputmethod.latin.common.CodePointUtils;
|
||||
import com.android.inputmethod.latin.common.FileUtils;
|
||||
import com.android.inputmethod.latin.common.LocaleUtils;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
|
||||
import com.android.inputmethod.latin.makedict.DictDecoder;
|
||||
import com.android.inputmethod.latin.makedict.DictionaryHeader;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
||||
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
||||
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
|
||||
import com.android.inputmethod.latin.utils.WordInputEventForPersonalization;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@LargeTest
|
||||
public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||
private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
|
||||
private static final String TEST_LOCALE = "test";
|
||||
private static final int DUMMY_PROBABILITY = 0;
|
||||
private static final int[] DICT_FORMAT_VERSIONS =
|
||||
new int[] { FormatSpec.VERSION402, FormatSpec.VERSION403, FormatSpec.VERSION4_DEV };
|
||||
private static final String DICTIONARY_ID = "TestDecayingBinaryDictionary";
|
||||
|
||||
private int mCurrentTime = 0;
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
mCurrentTime = 0;
|
||||
mDictFilesToBeDeleted.clear();
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void tearDown() throws Exception {
|
||||
for (final File dictFile : mDictFilesToBeDeleted) {
|
||||
dictFile.delete();
|
||||
}
|
||||
mDictFilesToBeDeleted.clear();
|
||||
stopTestModeInNativeCode();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
private static boolean supportsCountBasedNgram(final int formatVersion) {
|
||||
return formatVersion >= FormatSpec.VERSION403;
|
||||
}
|
||||
|
||||
private static boolean supportsNgram(final int formatVersion) {
|
||||
return formatVersion >= FormatSpec.VERSION403;
|
||||
}
|
||||
|
||||
private void onInputWord(final BinaryDictionary binaryDictionary, final String word,
|
||||
final boolean isValidWord) {
|
||||
binaryDictionary.updateEntriesForWordWithNgramContext(NgramContext.EMPTY_PREV_WORDS_INFO,
|
||||
word, isValidWord, 1 /* count */, mCurrentTime /* timestamp */);
|
||||
}
|
||||
|
||||
private void onInputWordWithPrevWord(final BinaryDictionary binaryDictionary, final String word,
|
||||
final boolean isValidWord, final String prevWord) {
|
||||
binaryDictionary.updateEntriesForWordWithNgramContext(
|
||||
new NgramContext(new WordInfo(prevWord)), word, isValidWord, 1 /* count */,
|
||||
mCurrentTime /* timestamp */);
|
||||
}
|
||||
|
||||
private void onInputWordWithPrevWords(final BinaryDictionary binaryDictionary,
|
||||
final String word, final boolean isValidWord, final String prevWord,
|
||||
final String prevPrevWord) {
|
||||
binaryDictionary.updateEntriesForWordWithNgramContext(
|
||||
new NgramContext(new WordInfo(prevWord), new WordInfo(prevPrevWord)), word,
|
||||
isValidWord, 1 /* count */, mCurrentTime /* timestamp */);
|
||||
}
|
||||
|
||||
private void onInputWordWithBeginningOfSentenceContext(
|
||||
final BinaryDictionary binaryDictionary, final String word, final boolean isValidWord) {
|
||||
binaryDictionary.updateEntriesForWordWithNgramContext(NgramContext.BEGINNING_OF_SENTENCE,
|
||||
word, isValidWord, 1 /* count */, mCurrentTime /* timestamp */);
|
||||
}
|
||||
|
||||
private static boolean isValidBigram(final BinaryDictionary binaryDictionary,
|
||||
final String word0, final String word1) {
|
||||
return binaryDictionary.isValidNgram(new NgramContext(new WordInfo(word0)), word1);
|
||||
}
|
||||
|
||||
private static boolean isValidTrigram(final BinaryDictionary binaryDictionary,
|
||||
final String word0, final String word1, final String word2) {
|
||||
return binaryDictionary.isValidNgram(
|
||||
new NgramContext(new WordInfo(word1), new WordInfo(word0)), word2);
|
||||
}
|
||||
|
||||
private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
|
||||
// 30 days.
|
||||
final int timeToElapse = (int)TimeUnit.SECONDS.convert(30, TimeUnit.DAYS);
|
||||
mCurrentTime += timeToElapse;
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
binaryDictionary.flushWithGC();
|
||||
}
|
||||
|
||||
private void forcePassingLongTime(final BinaryDictionary binaryDictionary) {
|
||||
// 365 days.
|
||||
final int timeToElapse = (int)TimeUnit.SECONDS.convert(365, TimeUnit.DAYS);
|
||||
mCurrentTime += timeToElapse;
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
binaryDictionary.flushWithGC();
|
||||
}
|
||||
|
||||
private HashSet<File> mDictFilesToBeDeleted = new HashSet<>();
|
||||
|
||||
private File createEmptyDictionaryAndGetFile(final int formatVersion) {
|
||||
return createEmptyDictionaryWithAttributeMapAndGetFile(formatVersion,
|
||||
new HashMap<String, String>());
|
||||
}
|
||||
|
||||
private File createEmptyDictionaryWithAttributeMapAndGetFile(final int formatVersion,
|
||||
final HashMap<String, String> attributeMap) {
|
||||
try {
|
||||
final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion,
|
||||
attributeMap);
|
||||
mDictFilesToBeDeleted.add(dictFile);
|
||||
return dictFile;
|
||||
} catch (final IOException e) {
|
||||
fail(e.toString());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private File createEmptyVer4DictionaryAndGetFile(final int formatVersion,
|
||||
final HashMap<String, String> attributeMap)
|
||||
throws IOException {
|
||||
final File file = File.createTempFile(DICTIONARY_ID, TEST_DICT_FILE_EXTENSION,
|
||||
getContext().getCacheDir());
|
||||
FileUtils.deleteRecursively(file);
|
||||
attributeMap.put(DictionaryHeader.DICTIONARY_ID_KEY, DICTIONARY_ID);
|
||||
attributeMap.put(DictionaryHeader.DICTIONARY_VERSION_KEY,
|
||||
String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())));
|
||||
attributeMap.put(DictionaryHeader.USES_FORGETTING_CURVE_KEY,
|
||||
DictionaryHeader.ATTRIBUTE_VALUE_TRUE);
|
||||
attributeMap.put(DictionaryHeader.HAS_HISTORICAL_INFO_KEY,
|
||||
DictionaryHeader.ATTRIBUTE_VALUE_TRUE);
|
||||
if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion,
|
||||
LocaleUtils.constructLocaleFromString(TEST_LOCALE), attributeMap)) {
|
||||
return file;
|
||||
}
|
||||
throw new IOException("Empty dictionary " + file.getAbsolutePath()
|
||||
+ " cannot be created. Foramt version: " + formatVersion);
|
||||
}
|
||||
|
||||
private static BinaryDictionary getBinaryDictionary(final File dictFile) {
|
||||
return new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
}
|
||||
|
||||
private static int setCurrentTimeForTestMode(final int currentTime) {
|
||||
return BinaryDictionaryUtils.setCurrentTimeForTest(currentTime);
|
||||
}
|
||||
|
||||
private static int stopTestModeInNativeCode() {
|
||||
return BinaryDictionaryUtils.setCurrentTimeForTest(-1);
|
||||
}
|
||||
|
||||
public void testReadDictInJavaSide() {
|
||||
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
||||
testReadDictInJavaSide(formatVersion);
|
||||
}
|
||||
}
|
||||
|
||||
private void testReadDictInJavaSide(final int formatVersion) {
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
|
||||
final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
|
||||
onInputWord(binaryDictionary, "a", true /* isValidWord */);
|
||||
onInputWord(binaryDictionary, "ab", true /* isValidWord */);
|
||||
onInputWordWithPrevWord(binaryDictionary, "aaa", true /* isValidWord */, "a");
|
||||
binaryDictionary.flushWithGC();
|
||||
binaryDictionary.close();
|
||||
|
||||
final DictDecoder dictDecoder =
|
||||
BinaryDictIOUtils.getDictDecoder(dictFile, 0, dictFile.length());
|
||||
try {
|
||||
final FusionDictionary dict =
|
||||
dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
|
||||
PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, "a");
|
||||
assertNotNull(ptNode);
|
||||
assertTrue(ptNode.isTerminal());
|
||||
assertNotNull(ptNode.getBigram("aaa"));
|
||||
ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, "ab");
|
||||
assertNotNull(ptNode);
|
||||
assertTrue(ptNode.isTerminal());
|
||||
ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa");
|
||||
assertNotNull(ptNode);
|
||||
assertTrue(ptNode.isTerminal());
|
||||
} catch (IOException e) {
|
||||
fail("IOException while reading dictionary: " + e);
|
||||
} catch (UnsupportedFormatException e) {
|
||||
fail("Unsupported format: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
public void testControlCurrentTime() {
|
||||
final int TEST_COUNT = 1000;
|
||||
final long seed = System.currentTimeMillis();
|
||||
final Random random = new Random(seed);
|
||||
final int startTime = stopTestModeInNativeCode();
|
||||
for (int i = 0; i < TEST_COUNT; i++) {
|
||||
final int currentTime = random.nextInt(Integer.MAX_VALUE);
|
||||
final int currentTimeInNativeCode = setCurrentTimeForTestMode(currentTime);
|
||||
assertEquals(currentTime, currentTimeInNativeCode);
|
||||
}
|
||||
final int endTime = stopTestModeInNativeCode();
|
||||
final int MAX_ALLOWED_ELAPSED_TIME = 10;
|
||||
assertTrue(startTime <= endTime && endTime <= startTime + MAX_ALLOWED_ELAPSED_TIME);
|
||||
}
|
||||
|
||||
public void testAddValidAndInvalidWords() {
|
||||
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
||||
testAddValidAndInvalidWords(formatVersion);
|
||||
}
|
||||
}
|
||||
|
||||
private void testAddValidAndInvalidWords(final int formatVersion) {
|
||||
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
|
||||
final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
|
||||
|
||||
onInputWord(binaryDictionary, "a", false /* isValidWord */);
|
||||
assertFalse(binaryDictionary.isValidWord("a"));
|
||||
onInputWord(binaryDictionary, "a", false /* isValidWord */);
|
||||
onInputWord(binaryDictionary, "a", false /* isValidWord */);
|
||||
assertTrue(binaryDictionary.isValidWord("a"));
|
||||
|
||||
onInputWordWithPrevWord(binaryDictionary, "b", false /* isValidWord */, "a");
|
||||
assertFalse(isValidBigram(binaryDictionary, "a", "b"));
|
||||
onInputWordWithPrevWord(binaryDictionary, "b", false /* isValidWord */, "a");
|
||||
assertTrue(binaryDictionary.isValidWord("b"));
|
||||
assertTrue(isValidBigram(binaryDictionary, "a", "b"));
|
||||
|
||||
onInputWordWithPrevWord(binaryDictionary, "c", true /* isValidWord */, "a");
|
||||
assertTrue(isValidBigram(binaryDictionary, "a", "c"));
|
||||
|
||||
// Add bigrams of not valid unigrams.
|
||||
onInputWordWithPrevWord(binaryDictionary, "y", false /* isValidWord */, "x");
|
||||
assertFalse(isValidBigram(binaryDictionary, "x", "y"));
|
||||
onInputWordWithPrevWord(binaryDictionary, "y", true /* isValidWord */, "x");
|
||||
assertFalse(isValidBigram(binaryDictionary, "x", "y"));
|
||||
|
||||
if (!supportsNgram(formatVersion)) {
|
||||
return;
|
||||
}
|
||||
|
||||
onInputWordWithPrevWords(binaryDictionary, "c", true /* isValidWord */, "b", "a");
|
||||
assertTrue(isValidTrigram(binaryDictionary, "a", "b", "c"));
|
||||
assertTrue(isValidBigram(binaryDictionary, "b", "c"));
|
||||
onInputWordWithPrevWords(binaryDictionary, "d", false /* isValidWord */, "c", "b");
|
||||
assertFalse(isValidTrigram(binaryDictionary, "b", "c", "d"));
|
||||
assertFalse(isValidBigram(binaryDictionary, "c", "d"));
|
||||
|
||||
onInputWordWithPrevWords(binaryDictionary, "cd", true /* isValidWord */, "b", "a");
|
||||
assertTrue(isValidTrigram(binaryDictionary, "a", "b", "cd"));
|
||||
}
|
||||
|
||||
public void testDecayingProbability() {
|
||||
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
||||
testDecayingProbability(formatVersion);
|
||||
}
|
||||
}
|
||||
|
||||
private void testDecayingProbability(final int formatVersion) {
|
||||
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
|
||||
final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
|
||||
|
||||
onInputWord(binaryDictionary, "a", true /* isValidWord */);
|
||||
assertTrue(binaryDictionary.isValidWord("a"));
|
||||
forcePassingShortTime(binaryDictionary);
|
||||
if (supportsCountBasedNgram(formatVersion)) {
|
||||
// Count based ngram language model doesn't support decaying based on the elapsed time.
|
||||
assertTrue(binaryDictionary.isValidWord("a"));
|
||||
} else {
|
||||
assertFalse(binaryDictionary.isValidWord("a"));
|
||||
}
|
||||
forcePassingLongTime(binaryDictionary);
|
||||
assertFalse(binaryDictionary.isValidWord("a"));
|
||||
|
||||
onInputWord(binaryDictionary, "a", true /* isValidWord */);
|
||||
onInputWord(binaryDictionary, "a", true /* isValidWord */);
|
||||
onInputWord(binaryDictionary, "a", true /* isValidWord */);
|
||||
assertTrue(binaryDictionary.isValidWord("a"));
|
||||
forcePassingShortTime(binaryDictionary);
|
||||
assertTrue(binaryDictionary.isValidWord("a"));
|
||||
forcePassingLongTime(binaryDictionary);
|
||||
assertFalse(binaryDictionary.isValidWord("a"));
|
||||
|
||||
onInputWord(binaryDictionary, "a", true /* isValidWord */);
|
||||
onInputWordWithPrevWord(binaryDictionary, "b", true /* isValidWord */, "a");
|
||||
assertTrue(isValidBigram(binaryDictionary, "a", "b"));
|
||||
forcePassingShortTime(binaryDictionary);
|
||||
if (supportsCountBasedNgram(formatVersion)) {
|
||||
assertTrue(isValidBigram(binaryDictionary, "a", "b"));
|
||||
} else {
|
||||
assertFalse(isValidBigram(binaryDictionary, "a", "b"));
|
||||
}
|
||||
forcePassingLongTime(binaryDictionary);
|
||||
assertFalse(isValidBigram(binaryDictionary, "a", "b"));
|
||||
|
||||
onInputWord(binaryDictionary, "a", true /* isValidWord */);
|
||||
onInputWordWithPrevWord(binaryDictionary, "b", true /* isValidWord */, "a");
|
||||
onInputWord(binaryDictionary, "a", true /* isValidWord */);
|
||||
onInputWordWithPrevWord(binaryDictionary, "b", true /* isValidWord */, "a");
|
||||
onInputWord(binaryDictionary, "a", true /* isValidWord */);
|
||||
onInputWordWithPrevWord(binaryDictionary, "b", true /* isValidWord */, "a");
|
||||
assertTrue(isValidBigram(binaryDictionary, "a", "b"));
|
||||
forcePassingShortTime(binaryDictionary);
|
||||
assertTrue(isValidBigram(binaryDictionary, "a", "b"));
|
||||
forcePassingLongTime(binaryDictionary);
|
||||
assertFalse(isValidBigram(binaryDictionary, "a", "b"));
|
||||
|
||||
if (!supportsNgram(formatVersion)) {
|
||||
return;
|
||||
}
|
||||
|
||||
onInputWord(binaryDictionary, "ab", true /* isValidWord */);
|
||||
onInputWordWithPrevWord(binaryDictionary, "bc", true /* isValidWord */, "ab");
|
||||
onInputWordWithPrevWords(binaryDictionary, "cd", true /* isValidWord */, "bc", "ab");
|
||||
assertTrue(isValidTrigram(binaryDictionary, "ab", "bc", "cd"));
|
||||
forcePassingLongTime(binaryDictionary);
|
||||
assertFalse(isValidTrigram(binaryDictionary, "ab", "bc", "cd"));
|
||||
|
||||
onInputWord(binaryDictionary, "ab", true /* isValidWord */);
|
||||
onInputWordWithPrevWord(binaryDictionary, "bc", true /* isValidWord */, "ab");
|
||||
onInputWordWithPrevWords(binaryDictionary, "cd", true /* isValidWord */, "bc", "ab");
|
||||
onInputWord(binaryDictionary, "ab", true /* isValidWord */);
|
||||
onInputWordWithPrevWord(binaryDictionary, "bc", true /* isValidWord */, "ab");
|
||||
onInputWordWithPrevWords(binaryDictionary, "cd", true /* isValidWord */, "bc", "ab");
|
||||
onInputWord(binaryDictionary, "ab", true /* isValidWord */);
|
||||
onInputWordWithPrevWord(binaryDictionary, "bc", true /* isValidWord */, "ab");
|
||||
onInputWordWithPrevWords(binaryDictionary, "cd", true /* isValidWord */, "bc", "ab");
|
||||
forcePassingShortTime(binaryDictionary);
|
||||
assertTrue(isValidTrigram(binaryDictionary, "ab", "bc", "cd"));
|
||||
forcePassingLongTime(binaryDictionary);
|
||||
assertFalse(isValidTrigram(binaryDictionary, "ab", "bc", "cd"));
|
||||
|
||||
binaryDictionary.close();
|
||||
}
|
||||
|
||||
public void testAddManyUnigramsToDecayingDict() {
|
||||
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
||||
testAddManyUnigramsToDecayingDict(formatVersion);
|
||||
}
|
||||
}
|
||||
|
||||
private void testAddManyUnigramsToDecayingDict(final int formatVersion) {
|
||||
final int unigramCount = 30000;
|
||||
final int unigramTypedCount = 100000;
|
||||
final int codePointSetSize = 50;
|
||||
final long seed = System.currentTimeMillis();
|
||||
final Random random = new Random(seed);
|
||||
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
|
||||
final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
|
||||
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
||||
final ArrayList<String> words = new ArrayList<>();
|
||||
|
||||
for (int i = 0; i < unigramCount; i++) {
|
||||
final String word = CodePointUtils.generateWord(random, codePointSet);
|
||||
words.add(word);
|
||||
}
|
||||
|
||||
final int maxUnigramCount = Integer.parseInt(
|
||||
binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY));
|
||||
for (int i = 0; i < unigramTypedCount; i++) {
|
||||
final String word = words.get(random.nextInt(words.size()));
|
||||
onInputWord(binaryDictionary, word, true /* isValidWord */);
|
||||
|
||||
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||
final int unigramCountBeforeGC =
|
||||
Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
||||
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||
forcePassingShortTime(binaryDictionary);
|
||||
}
|
||||
final int unigramCountAfterGC =
|
||||
Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
||||
assertTrue(unigramCountBeforeGC > unigramCountAfterGC);
|
||||
}
|
||||
}
|
||||
|
||||
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0);
|
||||
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount);
|
||||
forcePassingLongTime(binaryDictionary);
|
||||
assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.UNIGRAM_COUNT_QUERY)));
|
||||
}
|
||||
|
||||
public void testOverflowUnigrams() {
|
||||
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
||||
testOverflowUnigrams(formatVersion);
|
||||
}
|
||||
}
|
||||
|
||||
private void testOverflowUnigrams(final int formatVersion) {
|
||||
final int unigramCount = 20000;
|
||||
final int eachUnigramTypedCount = 2;
|
||||
final int strongUnigramTypedCount = 20;
|
||||
final int weakUnigramTypedCount = 1;
|
||||
final int codePointSetSize = 50;
|
||||
final long seed = System.currentTimeMillis();
|
||||
final Random random = new Random(seed);
|
||||
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
|
||||
final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
||||
|
||||
final String strong = "strong";
|
||||
final String weak = "weak";
|
||||
for (int j = 0; j < strongUnigramTypedCount; j++) {
|
||||
onInputWord(binaryDictionary, strong, true /* isValidWord */);
|
||||
}
|
||||
for (int j = 0; j < weakUnigramTypedCount; j++) {
|
||||
onInputWord(binaryDictionary, weak, true /* isValidWord */);
|
||||
}
|
||||
assertTrue(binaryDictionary.isValidWord(strong));
|
||||
assertTrue(binaryDictionary.isValidWord(weak));
|
||||
|
||||
for (int i = 0; i < unigramCount; i++) {
|
||||
final String word = CodePointUtils.generateWord(random, codePointSet);
|
||||
for (int j = 0; j < eachUnigramTypedCount; j++) {
|
||||
onInputWord(binaryDictionary, word, true /* isValidWord */);
|
||||
}
|
||||
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||
final int unigramCountBeforeGC =
|
||||
Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
||||
assertTrue(binaryDictionary.isValidWord(strong));
|
||||
assertTrue(binaryDictionary.isValidWord(weak));
|
||||
binaryDictionary.flushWithGC();
|
||||
final int unigramCountAfterGC =
|
||||
Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
||||
assertTrue(unigramCountBeforeGC > unigramCountAfterGC);
|
||||
assertFalse(binaryDictionary.isValidWord(weak));
|
||||
assertTrue(binaryDictionary.isValidWord(strong));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testAddManyBigramsToDecayingDict() {
|
||||
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
||||
testAddManyBigramsToDecayingDict(formatVersion);
|
||||
}
|
||||
}
|
||||
|
||||
private void testAddManyBigramsToDecayingDict(final int formatVersion) {
|
||||
final int maxUnigramCount = 5000;
|
||||
final int maxBigramCount = 10000;
|
||||
final HashMap<String, String> attributeMap = new HashMap<>();
|
||||
attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount));
|
||||
attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount));
|
||||
|
||||
final int unigramCount = 5000;
|
||||
final int bigramCount = 30000;
|
||||
final int bigramTypedCount = 100000;
|
||||
final int codePointSetSize = 50;
|
||||
final long seed = System.currentTimeMillis();
|
||||
final Random random = new Random(seed);
|
||||
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
final File dictFile = createEmptyDictionaryWithAttributeMapAndGetFile(formatVersion,
|
||||
attributeMap);
|
||||
final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
|
||||
|
||||
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
||||
final ArrayList<String> words = new ArrayList<>();
|
||||
final ArrayList<Pair<String, String>> bigrams = new ArrayList<>();
|
||||
|
||||
for (int i = 0; i < unigramCount; ++i) {
|
||||
final String word = CodePointUtils.generateWord(random, codePointSet);
|
||||
words.add(word);
|
||||
}
|
||||
for (int i = 0; i < bigramCount; ++i) {
|
||||
final int word0Index = random.nextInt(words.size());
|
||||
int word1Index = random.nextInt(words.size() - 1);
|
||||
if (word1Index >= word0Index) {
|
||||
word1Index += 1;
|
||||
}
|
||||
final String word0 = words.get(word0Index);
|
||||
final String word1 = words.get(word1Index);
|
||||
final Pair<String, String> bigram = new Pair<>(word0, word1);
|
||||
bigrams.add(bigram);
|
||||
}
|
||||
|
||||
for (int i = 0; i < bigramTypedCount; ++i) {
|
||||
final Pair<String, String> bigram = bigrams.get(random.nextInt(bigrams.size()));
|
||||
onInputWord(binaryDictionary, bigram.first, true /* isValidWord */);
|
||||
onInputWordWithPrevWord(binaryDictionary, bigram.second, true /* isValidWord */,
|
||||
bigram.first);
|
||||
|
||||
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||
final int bigramCountBeforeGC =
|
||||
Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.BIGRAM_COUNT_QUERY));
|
||||
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||
forcePassingShortTime(binaryDictionary);
|
||||
}
|
||||
final int bigramCountAfterGC =
|
||||
Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.BIGRAM_COUNT_QUERY));
|
||||
assertTrue(bigramCountBeforeGC > bigramCountAfterGC);
|
||||
}
|
||||
}
|
||||
forcePassingShortTime(binaryDictionary);
|
||||
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.BIGRAM_COUNT_QUERY)) > 0);
|
||||
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.BIGRAM_COUNT_QUERY)) <= maxBigramCount);
|
||||
forcePassingLongTime(binaryDictionary);
|
||||
assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.BIGRAM_COUNT_QUERY)));
|
||||
}
|
||||
|
||||
public void testOverflowBigrams() {
|
||||
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
||||
testOverflowBigrams(formatVersion);
|
||||
}
|
||||
}
|
||||
|
||||
private void testOverflowBigrams(final int formatVersion) {
|
||||
final int maxUnigramCount = 5000;
|
||||
final int maxBigramCount = 10000;
|
||||
final HashMap<String, String> attributeMap = new HashMap<>();
|
||||
attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount));
|
||||
attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount));
|
||||
|
||||
final int bigramCount = 20000;
|
||||
final int unigramCount = 1000;
|
||||
final int unigramTypedCount = 20;
|
||||
final int eachBigramTypedCount = 2;
|
||||
final int strongBigramTypedCount = 20;
|
||||
final int weakBigramTypedCount = 1;
|
||||
final int codePointSetSize = 50;
|
||||
final long seed = System.currentTimeMillis();
|
||||
final Random random = new Random(seed);
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
final File dictFile = createEmptyDictionaryWithAttributeMapAndGetFile(formatVersion,
|
||||
attributeMap);
|
||||
final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
|
||||
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
||||
|
||||
final ArrayList<String> words = new ArrayList<>();
|
||||
for (int i = 0; i < unigramCount; i++) {
|
||||
final String word = CodePointUtils.generateWord(random, codePointSet);
|
||||
words.add(word);
|
||||
for (int j = 0; j < unigramTypedCount; j++) {
|
||||
onInputWord(binaryDictionary, word, true /* isValidWord */);
|
||||
}
|
||||
}
|
||||
final String strong = "strong";
|
||||
final String weak = "weak";
|
||||
final String target = "target";
|
||||
for (int j = 0; j < unigramTypedCount; j++) {
|
||||
onInputWord(binaryDictionary, weak, true /* isValidWord */);
|
||||
onInputWord(binaryDictionary, strong, true /* isValidWord */);
|
||||
}
|
||||
binaryDictionary.flushWithGC();
|
||||
for (int j = 0; j < strongBigramTypedCount; j++) {
|
||||
onInputWordWithPrevWord(binaryDictionary, target, true /* isValidWord */, strong);
|
||||
}
|
||||
for (int j = 0; j < weakBigramTypedCount; j++) {
|
||||
onInputWordWithPrevWord(binaryDictionary, target, true /* isValidWord */, weak);
|
||||
}
|
||||
assertTrue(isValidBigram(binaryDictionary, strong, target));
|
||||
assertTrue(isValidBigram(binaryDictionary, weak, target));
|
||||
|
||||
for (int i = 0; i < bigramCount; i++) {
|
||||
final int word0Index = random.nextInt(words.size());
|
||||
final String word0 = words.get(word0Index);
|
||||
final int index = random.nextInt(words.size() - 1);
|
||||
final int word1Index = (index >= word0Index) ? index + 1 : index;
|
||||
final String word1 = words.get(word1Index);
|
||||
|
||||
for (int j = 0; j < eachBigramTypedCount; j++) {
|
||||
onInputWordWithPrevWord(binaryDictionary, word1, true /* isValidWord */, word0);
|
||||
}
|
||||
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||
final int bigramCountBeforeGC =
|
||||
Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.BIGRAM_COUNT_QUERY));
|
||||
binaryDictionary.flushWithGC();
|
||||
final int bigramCountAfterGC =
|
||||
Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.BIGRAM_COUNT_QUERY));
|
||||
assertTrue(bigramCountBeforeGC > bigramCountAfterGC);
|
||||
assertTrue(isValidBigram(binaryDictionary, strong, target));
|
||||
assertFalse(isValidBigram(binaryDictionary, weak, target));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testDictMigration() {
|
||||
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
||||
testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
|
||||
}
|
||||
}
|
||||
|
||||
private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) {
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
final File dictFile = createEmptyDictionaryAndGetFile(fromFormatVersion);
|
||||
final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
|
||||
onInputWord(binaryDictionary, "aaa", true /* isValidWord */);
|
||||
assertTrue(binaryDictionary.isValidWord("aaa"));
|
||||
onInputWord(binaryDictionary, "ccc", true /* isValidWord */);
|
||||
onInputWord(binaryDictionary, "ccc", true /* isValidWord */);
|
||||
onInputWord(binaryDictionary, "ccc", true /* isValidWord */);
|
||||
onInputWord(binaryDictionary, "ccc", true /* isValidWord */);
|
||||
onInputWord(binaryDictionary, "ccc", true /* isValidWord */);
|
||||
|
||||
onInputWordWithPrevWord(binaryDictionary, "abc", true /* isValidWord */, "aaa");
|
||||
assertTrue(isValidBigram(binaryDictionary, "aaa", "abc"));
|
||||
onInputWordWithPrevWord(binaryDictionary, "bbb", false /* isValidWord */, "aaa");
|
||||
assertFalse(binaryDictionary.isValidWord("bbb"));
|
||||
assertFalse(isValidBigram(binaryDictionary, "aaa", "bbb"));
|
||||
|
||||
if (supportsNgram(toFormatVersion)) {
|
||||
onInputWordWithPrevWords(binaryDictionary, "xyz", true, "abc", "aaa");
|
||||
assertTrue(isValidTrigram(binaryDictionary, "aaa", "abc", "xyz"));
|
||||
onInputWordWithPrevWords(binaryDictionary, "def", false, "abc", "aaa");
|
||||
assertFalse(isValidTrigram(binaryDictionary, "aaa", "abc", "def"));
|
||||
}
|
||||
|
||||
assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
|
||||
assertTrue(binaryDictionary.migrateTo(toFormatVersion));
|
||||
assertTrue(binaryDictionary.isValidDictionary());
|
||||
assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
|
||||
assertTrue(binaryDictionary.isValidWord("aaa"));
|
||||
assertFalse(binaryDictionary.isValidWord("bbb"));
|
||||
if (supportsCountBasedNgram(toFormatVersion)) {
|
||||
assertTrue(binaryDictionary.getFrequency("aaa") < binaryDictionary.getFrequency("ccc"));
|
||||
onInputWord(binaryDictionary, "bbb", false /* isValidWord */);
|
||||
assertTrue(binaryDictionary.isValidWord("bbb"));
|
||||
}
|
||||
assertTrue(isValidBigram(binaryDictionary, "aaa", "abc"));
|
||||
assertFalse(isValidBigram(binaryDictionary, "aaa", "bbb"));
|
||||
if (supportsCountBasedNgram(toFormatVersion)) {
|
||||
onInputWordWithPrevWord(binaryDictionary, "bbb", false /* isValidWord */, "aaa");
|
||||
assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
|
||||
}
|
||||
if (supportsNgram(toFormatVersion)) {
|
||||
assertTrue(isValidTrigram(binaryDictionary, "aaa", "abc", "xyz"));
|
||||
assertFalse(isValidTrigram(binaryDictionary, "aaa", "abc", "def"));
|
||||
onInputWordWithPrevWords(binaryDictionary, "def", false, "abc", "aaa");
|
||||
assertTrue(isValidTrigram(binaryDictionary, "aaa", "abc", "def"));
|
||||
}
|
||||
|
||||
binaryDictionary.close();
|
||||
}
|
||||
|
||||
public void testBeginningOfSentence() {
|
||||
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
||||
testBeginningOfSentence(formatVersion);
|
||||
}
|
||||
}
|
||||
|
||||
private void testBeginningOfSentence(final int formatVersion) {
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
|
||||
final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
|
||||
|
||||
binaryDictionary.addUnigramEntry("", DUMMY_PROBABILITY,
|
||||
true /* isBeginningOfSentence */, true /* isNotAWord */,
|
||||
false /* isPossiblyOffensive */, mCurrentTime);
|
||||
final NgramContext beginningOfSentenceContext = NgramContext.BEGINNING_OF_SENTENCE;
|
||||
onInputWordWithBeginningOfSentenceContext(binaryDictionary, "aaa", true /* isValidWord */);
|
||||
assertFalse(binaryDictionary.isValidNgram(beginningOfSentenceContext, "aaa"));
|
||||
onInputWordWithBeginningOfSentenceContext(binaryDictionary, "aaa", true /* isValidWord */);
|
||||
assertTrue(binaryDictionary.isValidNgram(beginningOfSentenceContext, "aaa"));
|
||||
onInputWordWithBeginningOfSentenceContext(binaryDictionary, "aaa", true /* isValidWord */);
|
||||
onInputWordWithBeginningOfSentenceContext(binaryDictionary, "bbb", true /* isValidWord */);
|
||||
assertFalse(binaryDictionary.isValidNgram(beginningOfSentenceContext, "bbb"));
|
||||
onInputWordWithBeginningOfSentenceContext(binaryDictionary, "bbb", true /* isValidWord */);
|
||||
assertTrue(binaryDictionary.isValidNgram(beginningOfSentenceContext, "aaa"));
|
||||
assertTrue(binaryDictionary.isValidNgram(beginningOfSentenceContext, "bbb"));
|
||||
forcePassingLongTime(binaryDictionary);
|
||||
assertFalse(binaryDictionary.isValidNgram(beginningOfSentenceContext, "aaa"));
|
||||
assertFalse(binaryDictionary.isValidNgram(beginningOfSentenceContext, "bbb"));
|
||||
onInputWordWithBeginningOfSentenceContext(binaryDictionary, "aaa", true /* isValidWord */);
|
||||
onInputWordWithBeginningOfSentenceContext(binaryDictionary, "aaa", true /* isValidWord */);
|
||||
onInputWordWithBeginningOfSentenceContext(binaryDictionary, "bbb", true /* isValidWord */);
|
||||
onInputWordWithBeginningOfSentenceContext(binaryDictionary, "bbb", true /* isValidWord */);
|
||||
assertTrue(binaryDictionary.isValidNgram(beginningOfSentenceContext, "aaa"));
|
||||
assertTrue(binaryDictionary.isValidNgram(beginningOfSentenceContext, "bbb"));
|
||||
binaryDictionary.close();
|
||||
}
|
||||
|
||||
public void testRemoveUnigrams() {
|
||||
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
||||
testRemoveUnigrams(formatVersion);
|
||||
}
|
||||
}
|
||||
|
||||
private void testRemoveUnigrams(final int formatVersion) {
|
||||
final int unigramInputCount = 20;
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
|
||||
final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
|
||||
|
||||
onInputWord(binaryDictionary, "aaa", false /* isValidWord */);
|
||||
assertFalse(binaryDictionary.isValidWord("aaa"));
|
||||
for (int i = 0; i < unigramInputCount; i++) {
|
||||
onInputWord(binaryDictionary, "aaa", false /* isValidWord */);
|
||||
}
|
||||
assertTrue(binaryDictionary.isValidWord("aaa"));
|
||||
assertTrue(binaryDictionary.removeUnigramEntry("aaa"));
|
||||
assertFalse(binaryDictionary.isValidWord("aaa"));
|
||||
onInputWord(binaryDictionary, "aaa", false /* isValidWord */);
|
||||
assertFalse(binaryDictionary.isValidWord("aaa"));
|
||||
onInputWord(binaryDictionary, "aaa", false /* isValidWord */);
|
||||
assertTrue(binaryDictionary.isValidWord("aaa"));
|
||||
assertTrue(binaryDictionary.removeUnigramEntry("aaa"));
|
||||
assertFalse(binaryDictionary.isValidWord("aaa"));
|
||||
binaryDictionary.close();
|
||||
}
|
||||
|
||||
public void testUpdateEntriesForInputEvents() {
|
||||
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
||||
testUpdateEntriesForInputEvents(formatVersion);
|
||||
}
|
||||
}
|
||||
|
||||
private void testUpdateEntriesForInputEvents(final int formatVersion) {
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
final int codePointSetSize = 20;
|
||||
final int EVENT_COUNT = 1000;
|
||||
final double CONTINUE_RATE = 0.9;
|
||||
final long seed = System.currentTimeMillis();
|
||||
final Random random = new Random(seed);
|
||||
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
|
||||
|
||||
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
||||
final ArrayList<String> unigrams = new ArrayList<>();
|
||||
final ArrayList<Pair<String, String>> bigrams = new ArrayList<>();
|
||||
final ArrayList<Pair<Pair<String, String>, String>> trigrams = new ArrayList<>();
|
||||
|
||||
final WordInputEventForPersonalization[] inputEvents =
|
||||
new WordInputEventForPersonalization[EVENT_COUNT];
|
||||
NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
|
||||
int prevWordCount = 0;
|
||||
for (int i = 0; i < inputEvents.length; i++) {
|
||||
final String word = CodePointUtils.generateWord(random, codePointSet);
|
||||
inputEvents[i] = new WordInputEventForPersonalization(word, ngramContext, mCurrentTime);
|
||||
unigrams.add(word);
|
||||
if (prevWordCount >= 2) {
|
||||
final Pair<String, String> prevWordsPair = bigrams.get(bigrams.size() - 1);
|
||||
trigrams.add(new Pair<>(prevWordsPair, word));
|
||||
}
|
||||
if (prevWordCount >= 1) {
|
||||
bigrams.add(new Pair<>(ngramContext.getNthPrevWord(1 /* n */).toString(), word));
|
||||
}
|
||||
if (random.nextDouble() > CONTINUE_RATE) {
|
||||
ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
|
||||
prevWordCount = 0;
|
||||
} else {
|
||||
ngramContext = ngramContext.getNextNgramContext(new WordInfo(word));
|
||||
prevWordCount++;
|
||||
}
|
||||
}
|
||||
final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
|
||||
binaryDictionary.updateEntriesForInputEvents(inputEvents);
|
||||
|
||||
for (final String word : unigrams) {
|
||||
assertTrue(binaryDictionary.isInDictionary(word));
|
||||
}
|
||||
for (final Pair<String, String> bigram : bigrams) {
|
||||
assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second));
|
||||
}
|
||||
if (!supportsNgram(formatVersion)) {
|
||||
return;
|
||||
}
|
||||
for (final Pair<Pair<String, String>, String> trigram : trigrams) {
|
||||
assertTrue(isValidTrigram(binaryDictionary, trigram.first.first, trigram.first.second,
|
||||
trigram.second));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -21,15 +21,11 @@ import android.test.suitebuilder.annotation.LargeTest;
|
|||
import android.util.Log;
|
||||
|
||||
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
|
||||
import com.android.inputmethod.latin.NgramContext;
|
||||
import com.android.inputmethod.latin.NgramContext.WordInfo;
|
||||
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Unit tests for UserHistoryDictionary
|
||||
|
@ -87,20 +83,6 @@ public class UserHistoryDictionaryTests extends AndroidTestCase {
|
|||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
}
|
||||
|
||||
private void forcePassingShortTime() {
|
||||
// 3 days.
|
||||
final int timeToElapse = (int)TimeUnit.DAYS.toSeconds(3);
|
||||
mCurrentTime += timeToElapse;
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
}
|
||||
|
||||
private void forcePassingLongTime() {
|
||||
// 365 days.
|
||||
final int timeToElapse = (int)TimeUnit.DAYS.toSeconds(365);
|
||||
mCurrentTime += timeToElapse;
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
}
|
||||
|
||||
private static int setCurrentTimeForTestMode(final int currentTime) {
|
||||
return BinaryDictionaryUtils.setCurrentTimeForTest(currentTime);
|
||||
}
|
||||
|
@ -228,46 +210,4 @@ public class UserHistoryDictionaryTests extends AndroidTestCase {
|
|||
numberOfWords, random, true /* checksContents */, mCurrentTime));
|
||||
assertDictionaryExists(dict, dictFile);
|
||||
}
|
||||
|
||||
public void testDecaying() {
|
||||
doTestDecaying(TEST_ACCOUNT);
|
||||
}
|
||||
|
||||
public void testDecaying_NullAccount() {
|
||||
doTestDecaying(null /* testAccount */);
|
||||
}
|
||||
|
||||
private void doTestDecaying(final String testAccount) {
|
||||
final Locale dummyLocale = UserHistoryDictionaryTestsHelper.getDummyLocale("decaying");
|
||||
final UserHistoryDictionary dict = PersonalizationHelper.getUserHistoryDictionary(
|
||||
getContext(), dummyLocale, testAccount);
|
||||
resetCurrentTimeForTestMode();
|
||||
clearHistory(dict);
|
||||
dict.waitAllTasksForTests();
|
||||
|
||||
final int numberOfWords = 5000;
|
||||
final Random random = new Random(123456);
|
||||
final List<String> words = UserHistoryDictionaryTestsHelper.generateWords(numberOfWords,
|
||||
random);
|
||||
NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
|
||||
for (final String word : words) {
|
||||
UserHistoryDictionary.addToDictionary(dict, ngramContext, word, true, mCurrentTime);
|
||||
ngramContext = ngramContext.getNextNgramContext(new WordInfo(word));
|
||||
dict.waitAllTasksForTests();
|
||||
assertTrue(dict.isInDictionary(word));
|
||||
}
|
||||
forcePassingShortTime();
|
||||
dict.runGCIfRequired();
|
||||
dict.waitAllTasksForTests();
|
||||
for (final String word : words) {
|
||||
assertTrue(dict.isInDictionary(word));
|
||||
}
|
||||
// Long term decay results in words removed from the dictionary.
|
||||
forcePassingLongTime();
|
||||
dict.runGCIfRequired();
|
||||
dict.waitAllTasksForTests();
|
||||
for (final String word : words) {
|
||||
assertFalse(dict.isInDictionary(word));
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue