Merge "Make dumpAllWordsForDebug() use getNextWordProperty()."
This commit is contained in:
commit
99b7242f78
6 changed files with 63 additions and 622 deletions
|
@ -28,6 +28,7 @@ import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||||
import com.android.inputmethod.latin.utils.FileUtils;
|
import com.android.inputmethod.latin.utils.FileUtils;
|
||||||
import com.android.inputmethod.latin.utils.LanguageModelParam;
|
import com.android.inputmethod.latin.utils.LanguageModelParam;
|
||||||
import com.android.inputmethod.latin.utils.PrioritizedSerialExecutor;
|
import com.android.inputmethod.latin.utils.PrioritizedSerialExecutor;
|
||||||
|
import com.android.inputmethod.latin.utils.WordProperty;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -778,16 +779,24 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
||||||
}
|
}
|
||||||
|
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
protected void runAfterGcForDebug(final Runnable r) {
|
public void dumpAllWordsForDebug() {
|
||||||
getExecutor(mDictName).executePrioritized(new Runnable() {
|
reloadDictionaryIfRequired();
|
||||||
|
getExecutor(mDictName).execute(new Runnable() {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
try {
|
Log.d(TAG, "dictionary=" + mDictName);
|
||||||
mBinaryDictionary.flushWithGC();
|
int token = 0;
|
||||||
r.run();
|
do {
|
||||||
} finally {
|
final BinaryDictionary.GetNextWordPropertyResult result =
|
||||||
mDictNameDictionaryUpdateController.mProcessingLargeTask.set(false);
|
mBinaryDictionary.getNextWordProperty(token);
|
||||||
}
|
final WordProperty wordProperty = result.mWordProperty;
|
||||||
|
if (wordProperty == null) {
|
||||||
|
Log.d(TAG, " dictionary is empty.");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Log.d(TAG, wordProperty.toString());
|
||||||
|
token = result.mNextToken;
|
||||||
|
} while (token != 0);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,21 +17,15 @@
|
||||||
package com.android.inputmethod.latin.personalization;
|
package com.android.inputmethod.latin.personalization;
|
||||||
|
|
||||||
import android.content.Context;
|
import android.content.Context;
|
||||||
import android.util.Log;
|
|
||||||
|
|
||||||
import com.android.inputmethod.annotations.UsedForTesting;
|
import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
import com.android.inputmethod.latin.Constants;
|
import com.android.inputmethod.latin.Constants;
|
||||||
import com.android.inputmethod.latin.Dictionary;
|
import com.android.inputmethod.latin.Dictionary;
|
||||||
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
|
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.DictDecoder;
|
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||||
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
|
||||||
import com.android.inputmethod.latin.utils.LanguageModelParam;
|
import com.android.inputmethod.latin.utils.LanguageModelParam;
|
||||||
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils;
|
|
||||||
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
@ -44,7 +38,6 @@ import java.util.concurrent.TimeUnit;
|
||||||
*/
|
*/
|
||||||
public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableBinaryDictionary {
|
public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableBinaryDictionary {
|
||||||
private static final String TAG = DecayingExpandableBinaryDictionaryBase.class.getSimpleName();
|
private static final String TAG = DecayingExpandableBinaryDictionaryBase.class.getSimpleName();
|
||||||
public static final boolean DBG_SAVE_RESTORE = false;
|
|
||||||
private static final boolean DBG_DUMP_ON_CLOSE = false;
|
private static final boolean DBG_DUMP_ON_CLOSE = false;
|
||||||
|
|
||||||
/** Any pair being typed or picked */
|
/** Any pair being typed or picked */
|
||||||
|
@ -53,8 +46,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
|
||||||
public static final int FREQUENCY_FOR_WORDS_IN_DICTS = FREQUENCY_FOR_TYPED;
|
public static final int FREQUENCY_FOR_WORDS_IN_DICTS = FREQUENCY_FOR_TYPED;
|
||||||
public static final int FREQUENCY_FOR_WORDS_NOT_IN_DICTS = Dictionary.NOT_A_PROBABILITY;
|
public static final int FREQUENCY_FOR_WORDS_NOT_IN_DICTS = Dictionary.NOT_A_PROBABILITY;
|
||||||
|
|
||||||
public static final int REQUIRED_BINARY_DICTIONARY_VERSION = FormatSpec.VERSION4;
|
|
||||||
|
|
||||||
/** The locale for this dictionary. */
|
/** The locale for this dictionary. */
|
||||||
public final Locale mLocale;
|
public final Locale mLocale;
|
||||||
|
|
||||||
|
@ -160,57 +151,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
|
||||||
// Never loaded to memory in Java side.
|
// Never loaded to memory in Java side.
|
||||||
}
|
}
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
public void dumpAllWordsForDebug() {
|
|
||||||
runAfterGcForDebug(new Runnable() {
|
|
||||||
@Override
|
|
||||||
public void run() {
|
|
||||||
dumpAllWordsForDebugLocked();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
private void dumpAllWordsForDebugLocked() {
|
|
||||||
Log.d(TAG, "dumpAllWordsForDebug started.");
|
|
||||||
final OnAddWordListener listener = new OnAddWordListener() {
|
|
||||||
@Override
|
|
||||||
public void setUnigram(final String word, final String shortcutTarget,
|
|
||||||
final int frequency, final int shortcutFreq) {
|
|
||||||
Log.d(TAG, "load unigram: " + word + "," + frequency);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setBigram(final String word0, final String word1, final int frequency) {
|
|
||||||
if (word0.length() < Constants.DICTIONARY_MAX_WORD_LENGTH
|
|
||||||
&& word1.length() < Constants.DICTIONARY_MAX_WORD_LENGTH) {
|
|
||||||
Log.d(TAG, "load bigram: " + word0 + "," + word1 + "," + frequency);
|
|
||||||
} else {
|
|
||||||
Log.d(TAG, "Skip inserting a too long bigram: " + word0 + "," + word1 + ","
|
|
||||||
+ frequency);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Load the dictionary from binary file
|
|
||||||
final File dictFile = new File(mContext.getFilesDir(), mDictName);
|
|
||||||
final DictDecoder dictDecoder = FormatSpec.getDictDecoder(dictFile,
|
|
||||||
DictDecoder.USE_BYTEARRAY);
|
|
||||||
if (dictDecoder == null) {
|
|
||||||
// This is an expected condition: we don't have a user history dictionary for this
|
|
||||||
// language yet. It will be created sometime later.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
dictDecoder.openDictBuffer();
|
|
||||||
UserHistoryDictIOUtils.readDictionaryBinary(dictDecoder, listener);
|
|
||||||
} catch (IOException e) {
|
|
||||||
Log.d(TAG, "IOException on opening a bytebuffer", e);
|
|
||||||
} catch (UnsupportedFormatException e) {
|
|
||||||
Log.d(TAG, "Unsupported format, can't read the dictionary", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public void clearAndFlushDictionary() {
|
public void clearAndFlushDictionary() {
|
||||||
// Clear the node structure on memory
|
// Clear the node structure on memory
|
||||||
|
|
|
@ -1,128 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2012 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.android.inputmethod.latin.personalization;
|
|
||||||
|
|
||||||
import android.util.Log;
|
|
||||||
|
|
||||||
import com.android.inputmethod.annotations.UsedForTesting;
|
|
||||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A store of bigrams which will be updated when the user history dictionary is closed
|
|
||||||
* All bigrams including stale ones in SQL DB should be stored in this class to avoid adding stale
|
|
||||||
* bigrams when we write to the SQL DB.
|
|
||||||
*/
|
|
||||||
@UsedForTesting
|
|
||||||
public final class UserHistoryDictionaryBigramList {
|
|
||||||
public static final byte FORGETTING_CURVE_INITIAL_VALUE = 0;
|
|
||||||
private static final String TAG = UserHistoryDictionaryBigramList.class.getSimpleName();
|
|
||||||
private static final HashMap<String, Byte> EMPTY_BIGRAM_MAP = CollectionUtils.newHashMap();
|
|
||||||
private final HashMap<String, HashMap<String, Byte>> mBigramMap = CollectionUtils.newHashMap();
|
|
||||||
private int mSize = 0;
|
|
||||||
|
|
||||||
public void evictAll() {
|
|
||||||
mSize = 0;
|
|
||||||
mBigramMap.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Called when the user typed a word.
|
|
||||||
*/
|
|
||||||
@UsedForTesting
|
|
||||||
public void addBigram(String word1, String word2) {
|
|
||||||
addBigram(word1, word2, FORGETTING_CURVE_INITIAL_VALUE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Called when loaded from the SQL DB.
|
|
||||||
*/
|
|
||||||
public void addBigram(String word1, String word2, byte fcValue) {
|
|
||||||
if (DecayingExpandableBinaryDictionaryBase.DBG_SAVE_RESTORE) {
|
|
||||||
Log.d(TAG, "--- add bigram: " + word1 + ", " + word2 + ", " + fcValue);
|
|
||||||
}
|
|
||||||
final HashMap<String, Byte> map;
|
|
||||||
if (mBigramMap.containsKey(word1)) {
|
|
||||||
map = mBigramMap.get(word1);
|
|
||||||
} else {
|
|
||||||
map = CollectionUtils.newHashMap();
|
|
||||||
mBigramMap.put(word1, map);
|
|
||||||
}
|
|
||||||
if (!map.containsKey(word2)) {
|
|
||||||
++mSize;
|
|
||||||
map.put(word2, fcValue);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Called when inserted to the SQL DB.
|
|
||||||
*/
|
|
||||||
public void updateBigram(String word1, String word2, byte fcValue) {
|
|
||||||
if (DecayingExpandableBinaryDictionaryBase.DBG_SAVE_RESTORE) {
|
|
||||||
Log.d(TAG, "--- update bigram: " + word1 + ", " + word2 + ", " + fcValue);
|
|
||||||
}
|
|
||||||
final HashMap<String, Byte> map;
|
|
||||||
if (mBigramMap.containsKey(word1)) {
|
|
||||||
map = mBigramMap.get(word1);
|
|
||||||
} else {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (!map.containsKey(word2)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
map.put(word2, fcValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
public int size() {
|
|
||||||
return mSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return mBigramMap.isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean containsKey(String word) {
|
|
||||||
return mBigramMap.containsKey(word);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Set<String> keySet() {
|
|
||||||
return mBigramMap.keySet();
|
|
||||||
}
|
|
||||||
|
|
||||||
public HashMap<String, Byte> getBigrams(String word1) {
|
|
||||||
if (mBigramMap.containsKey(word1)) return mBigramMap.get(word1);
|
|
||||||
// TODO: lower case according to locale
|
|
||||||
final String lowerWord1 = word1.toLowerCase();
|
|
||||||
if (mBigramMap.containsKey(lowerWord1)) return mBigramMap.get(lowerWord1);
|
|
||||||
return EMPTY_BIGRAM_MAP;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean removeBigram(String word1, String word2) {
|
|
||||||
final HashMap<String, Byte> set = getBigrams(word1);
|
|
||||||
if (set.isEmpty()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (set.containsKey(word2)) {
|
|
||||||
set.remove(word2);
|
|
||||||
--mSize;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,181 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2012 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.android.inputmethod.latin.utils;
|
|
||||||
|
|
||||||
import android.util.Log;
|
|
||||||
|
|
||||||
import com.android.inputmethod.annotations.UsedForTesting;
|
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
|
|
||||||
import com.android.inputmethod.latin.makedict.DictDecoder;
|
|
||||||
import com.android.inputmethod.latin.makedict.DictEncoder;
|
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
|
||||||
import com.android.inputmethod.latin.makedict.PendingAttribute;
|
|
||||||
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
|
||||||
import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.util.TreeMap;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads and writes Binary files for a UserHistoryDictionary.
|
|
||||||
*
|
|
||||||
* All the methods in this class are static.
|
|
||||||
*/
|
|
||||||
public final class UserHistoryDictIOUtils {
|
|
||||||
private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName();
|
|
||||||
private static final boolean DEBUG = false;
|
|
||||||
|
|
||||||
public interface OnAddWordListener {
|
|
||||||
/**
|
|
||||||
* Callback to be notified when a word is added to the dictionary.
|
|
||||||
* @param word The added word.
|
|
||||||
* @param shortcutTarget A shortcut target for this word, or null if none.
|
|
||||||
* @param frequency The frequency for this word.
|
|
||||||
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist).
|
|
||||||
* Unspecified if shortcutTarget is null - do not rely on its value.
|
|
||||||
*/
|
|
||||||
public void setUnigram(final String word, final String shortcutTarget, final int frequency,
|
|
||||||
final int shortcutFreq);
|
|
||||||
public void setBigram(final String word1, final String word2, final int frequency);
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
public interface BigramDictionaryInterface {
|
|
||||||
public int getFrequency(final String word1, final String word2);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Writes dictionary to file.
|
|
||||||
*/
|
|
||||||
@UsedForTesting
|
|
||||||
public static void writeDictionary(final DictEncoder dictEncoder,
|
|
||||||
final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
|
|
||||||
final FormatOptions formatOptions, final HashMap<String, String> options) {
|
|
||||||
final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams, options);
|
|
||||||
fusionDict.addOptionAttribute(FormatSpec.FileHeader.USES_FORGETTING_CURVE_KEY,
|
|
||||||
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
|
||||||
fusionDict.addOptionAttribute(FormatSpec.FileHeader.DICTIONARY_DATE_KEY,
|
|
||||||
String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())));
|
|
||||||
try {
|
|
||||||
dictEncoder.writeDictionary(fusionDict, formatOptions);
|
|
||||||
Log.d(TAG, "end writing");
|
|
||||||
} catch (IOException e) {
|
|
||||||
Log.e(TAG, "IO exception while writing file", e);
|
|
||||||
} catch (UnsupportedFormatException e) {
|
|
||||||
Log.e(TAG, "Unsupported format", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a new FusionDictionary from BigramDictionaryInterface.
|
|
||||||
*/
|
|
||||||
@UsedForTesting
|
|
||||||
static FusionDictionary constructFusionDictionary(final BigramDictionaryInterface dict,
|
|
||||||
final UserHistoryDictionaryBigramList bigrams, final HashMap<String, String> options) {
|
|
||||||
final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(),
|
|
||||||
new FusionDictionary.DictionaryOptions(options));
|
|
||||||
int profTotal = 0;
|
|
||||||
for (final String word1 : bigrams.keySet()) {
|
|
||||||
final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1);
|
|
||||||
for (final String word2 : word1Bigrams.keySet()) {
|
|
||||||
final int freq = dict.getFrequency(word1, word2);
|
|
||||||
if (freq == -1) {
|
|
||||||
// don't add this bigram.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (DEBUG) {
|
|
||||||
if (word1 == null) {
|
|
||||||
Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq));
|
|
||||||
} else {
|
|
||||||
Log.d(TAG, "add bigram: " + word1
|
|
||||||
+ "," + word2 + "," + Integer.toString(freq));
|
|
||||||
}
|
|
||||||
profTotal++;
|
|
||||||
}
|
|
||||||
if (word1 == null) { // unigram
|
|
||||||
fusionDict.add(word2, freq, null, false /* isNotAWord */);
|
|
||||||
} else { // bigram
|
|
||||||
if (FusionDictionary.findWordInTree(fusionDict.mRootNodeArray, word1) == null) {
|
|
||||||
fusionDict.add(word1, 2, null, false /* isNotAWord */);
|
|
||||||
}
|
|
||||||
fusionDict.setBigram(word1, word2, freq);
|
|
||||||
}
|
|
||||||
bigrams.updateBigram(word1, word2, (byte)freq);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (DEBUG) {
|
|
||||||
Log.d(TAG, "add " + profTotal + "words");
|
|
||||||
}
|
|
||||||
return fusionDict;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads dictionary from file.
|
|
||||||
*/
|
|
||||||
public static void readDictionaryBinary(final DictDecoder dictDecoder,
|
|
||||||
final OnAddWordListener dict) {
|
|
||||||
final TreeMap<Integer, String> unigrams = CollectionUtils.newTreeMap();
|
|
||||||
final TreeMap<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
|
|
||||||
final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
|
|
||||||
try {
|
|
||||||
dictDecoder.readUnigramsAndBigramsBinary(unigrams, frequencies, bigrams);
|
|
||||||
} catch (IOException e) {
|
|
||||||
Log.e(TAG, "IO exception while reading file", e);
|
|
||||||
} catch (UnsupportedFormatException e) {
|
|
||||||
Log.e(TAG, "Unsupported format", e);
|
|
||||||
} catch (ArrayIndexOutOfBoundsException e) {
|
|
||||||
Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file", e);
|
|
||||||
}
|
|
||||||
addWordsFromWordMap(unigrams, frequencies, bigrams, dict);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds all unigrams and bigrams in maps to OnAddWordListener.
|
|
||||||
*/
|
|
||||||
@UsedForTesting
|
|
||||||
static void addWordsFromWordMap(final TreeMap<Integer, String> unigrams,
|
|
||||||
final TreeMap<Integer, Integer> frequencies,
|
|
||||||
final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams,
|
|
||||||
final OnAddWordListener to) {
|
|
||||||
for (Entry<Integer, String> entry : unigrams.entrySet()) {
|
|
||||||
final String word1 = entry.getValue();
|
|
||||||
final int unigramFrequency = frequencies.get(entry.getKey());
|
|
||||||
to.setUnigram(word1, null /* shortcutTarget */, unigramFrequency, 0 /* shortcutFreq */);
|
|
||||||
final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());
|
|
||||||
if (attrList != null) {
|
|
||||||
for (final PendingAttribute attr : attrList) {
|
|
||||||
final String word2 = unigrams.get(attr.mAddress);
|
|
||||||
if (word1 == null || word2 == null) {
|
|
||||||
Log.e(TAG, "Invalid bigram pair detected: " + word1 + ", " + word2);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
to.setBigram(word1, word2,
|
|
||||||
BinaryDictIOUtils.reconstructBigramFrequency(unigramFrequency,
|
|
||||||
attr.mFrequency));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -41,7 +41,7 @@ public class WordProperty {
|
||||||
// package.
|
// package.
|
||||||
public static final class ProbabilityInfo {
|
public static final class ProbabilityInfo {
|
||||||
public final int mProbability;
|
public final int mProbability;
|
||||||
// wTimestamp, mLevel and mCount are historical info. These values are depend on the
|
// mTimestamp, mLevel and mCount are historical info. These values are depend on the
|
||||||
// implementation in native code; thus, we must not use them and have any assumptions about
|
// implementation in native code; thus, we must not use them and have any assumptions about
|
||||||
// them except for tests.
|
// them except for tests.
|
||||||
public final int mTimestamp;
|
public final int mTimestamp;
|
||||||
|
@ -54,6 +54,11 @@ public class WordProperty {
|
||||||
mLevel = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX];
|
mLevel = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX];
|
||||||
mCount = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX];
|
mCount = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return mTimestamp + ":" + mLevel + ":" + mCount;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int getCodePointCount(final int[] codePoints) {
|
private static int getCodePointCount(final int[] codePoints) {
|
||||||
|
@ -105,4 +110,44 @@ public class WordProperty {
|
||||||
public boolean isValid() {
|
public boolean isValid() {
|
||||||
return mProbabilityInfo.mProbability != BinaryDictionary.NOT_A_PROBABILITY;
|
return mProbabilityInfo.mProbability != BinaryDictionary.NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
// TODO: Move this logic to CombinedInputOutput.
|
||||||
|
final StringBuffer builder = new StringBuffer();
|
||||||
|
builder.append(" word=" + mCodePoints);
|
||||||
|
builder.append(",");
|
||||||
|
builder.append("f=" + mProbabilityInfo.mProbability);
|
||||||
|
if (mIsNotAWord) {
|
||||||
|
builder.append(",");
|
||||||
|
builder.append("not_a_word=true");
|
||||||
|
}
|
||||||
|
if (mIsBlacklisted) {
|
||||||
|
builder.append(",");
|
||||||
|
builder.append("blacklisted=true");
|
||||||
|
}
|
||||||
|
if (mProbabilityInfo.mTimestamp != BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
|
||||||
|
builder.append(",");
|
||||||
|
builder.append("historicalInfo=" + mProbabilityInfo);
|
||||||
|
}
|
||||||
|
builder.append("\n");
|
||||||
|
for (int i = 0; i < mBigramTargets.size(); i++) {
|
||||||
|
builder.append(" bigram=" + mBigramTargets.get(i).mWord);
|
||||||
|
builder.append(",");
|
||||||
|
builder.append("f=" + mBigramTargets.get(i).mFrequency);
|
||||||
|
if (mBigramProbabilityInfo.get(i).mTimestamp
|
||||||
|
!= BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
|
||||||
|
builder.append(",");
|
||||||
|
builder.append("historicalInfo=" + mBigramProbabilityInfo.get(i));
|
||||||
|
}
|
||||||
|
builder.append("\n");
|
||||||
|
}
|
||||||
|
for (int i = 0; i < mShortcutTargets.size(); i++) {
|
||||||
|
builder.append(" shortcut=" + mShortcutTargets.get(i).mWord);
|
||||||
|
builder.append(",");
|
||||||
|
builder.append("f=" + mShortcutTargets.get(i).mFrequency);
|
||||||
|
builder.append("\n");
|
||||||
|
}
|
||||||
|
return builder.toString();
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -1,244 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2012 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.android.inputmethod.latin.utils;
|
|
||||||
|
|
||||||
import android.content.Context;
|
|
||||||
import android.test.AndroidTestCase;
|
|
||||||
import android.test.suitebuilder.annotation.LargeTest;
|
|
||||||
import android.util.Log;
|
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.DictDecoder;
|
|
||||||
import com.android.inputmethod.latin.makedict.DictEncoder;
|
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
|
||||||
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
|
||||||
import com.android.inputmethod.latin.makedict.Ver2DictDecoder;
|
|
||||||
import com.android.inputmethod.latin.makedict.Ver2DictEncoder;
|
|
||||||
import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList;
|
|
||||||
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.BigramDictionaryInterface;
|
|
||||||
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Unit tests for UserHistoryDictIOUtils
|
|
||||||
*/
|
|
||||||
@LargeTest
|
|
||||||
public class UserHistoryDictIOUtilsTests extends AndroidTestCase
|
|
||||||
implements BigramDictionaryInterface {
|
|
||||||
|
|
||||||
private static final String TAG = UserHistoryDictIOUtilsTests.class.getSimpleName();
|
|
||||||
private static final int UNIGRAM_FREQUENCY = 50;
|
|
||||||
private static final int BIGRAM_FREQUENCY = 100;
|
|
||||||
private static final ArrayList<String> NOT_HAVE_BIGRAM = new ArrayList<String>();
|
|
||||||
private static final FormatSpec.FormatOptions FORMAT_OPTIONS = new FormatSpec.FormatOptions(2);
|
|
||||||
private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
|
|
||||||
private static final HashMap<String, String> HEADER_OPTIONS = new HashMap<String, String>();
|
|
||||||
static {
|
|
||||||
HEADER_OPTIONS.put(FileHeader.DICTIONARY_LOCALE_KEY, "en_US");
|
|
||||||
HEADER_OPTIONS.put(FileHeader.DICTIONARY_ID_KEY, "test");
|
|
||||||
HEADER_OPTIONS.put(FileHeader.DICTIONARY_VERSION_KEY, "1000");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return same frequency for all words and bigrams
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public int getFrequency(String word1, String word2) {
|
|
||||||
if (word1 == null) return UNIGRAM_FREQUENCY;
|
|
||||||
return BIGRAM_FREQUENCY;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Utilities for Testing
|
|
||||||
|
|
||||||
private void addWord(final String word,
|
|
||||||
final HashMap<String, ArrayList<String> > addedWords) {
|
|
||||||
if (!addedWords.containsKey(word)) {
|
|
||||||
addedWords.put(word, new ArrayList<String>());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void addBigram(final String word1, final String word2,
|
|
||||||
final HashMap<String, ArrayList<String> > addedWords) {
|
|
||||||
addWord(word1, addedWords);
|
|
||||||
addWord(word2, addedWords);
|
|
||||||
addedWords.get(word1).add(word2);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void addBigramToBigramList(final String word1, final String word2,
|
|
||||||
final HashMap<String, ArrayList<String> > addedWords,
|
|
||||||
final UserHistoryDictionaryBigramList bigramList) {
|
|
||||||
bigramList.addBigram(null, word1);
|
|
||||||
bigramList.addBigram(word1, word2);
|
|
||||||
|
|
||||||
addBigram(word1, word2, addedWords);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void checkWordInFusionDict(final FusionDictionary dict, final String word,
|
|
||||||
final ArrayList<String> expectedBigrams) {
|
|
||||||
final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
|
|
||||||
assertNotNull(ptNode);
|
|
||||||
assertTrue(ptNode.isTerminal());
|
|
||||||
|
|
||||||
for (final String bigram : expectedBigrams) {
|
|
||||||
assertNotNull(ptNode.getBigram(bigram));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void checkWordsInFusionDict(final FusionDictionary dict,
|
|
||||||
final HashMap<String, ArrayList<String> > bigrams) {
|
|
||||||
for (final String word : bigrams.keySet()) {
|
|
||||||
if (bigrams.containsKey(word)) {
|
|
||||||
checkWordInFusionDict(dict, word, bigrams.get(word));
|
|
||||||
} else {
|
|
||||||
checkWordInFusionDict(dict, word, NOT_HAVE_BIGRAM);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void checkWordInBigramList(
|
|
||||||
final UserHistoryDictionaryBigramList bigramList, final String word,
|
|
||||||
final ArrayList<String> expectedBigrams) {
|
|
||||||
// check unigram
|
|
||||||
final HashMap<String,Byte> unigramMap = bigramList.getBigrams(null);
|
|
||||||
assertTrue(unigramMap.containsKey(word));
|
|
||||||
|
|
||||||
// check bigrams
|
|
||||||
final ArrayList<String> actualBigrams = new ArrayList<String>(
|
|
||||||
bigramList.getBigrams(word).keySet());
|
|
||||||
|
|
||||||
Collections.sort(expectedBigrams);
|
|
||||||
Collections.sort(actualBigrams);
|
|
||||||
assertEquals(expectedBigrams, actualBigrams);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void checkWordsInBigramList(final UserHistoryDictionaryBigramList bigramList,
|
|
||||||
final HashMap<String, ArrayList<String> > addedWords) {
|
|
||||||
for (final String word : addedWords.keySet()) {
|
|
||||||
if (addedWords.containsKey(word)) {
|
|
||||||
checkWordInBigramList(bigramList, word, addedWords.get(word));
|
|
||||||
} else {
|
|
||||||
checkWordInBigramList(bigramList, word, NOT_HAVE_BIGRAM);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void writeDictToFile(final File file,
|
|
||||||
final UserHistoryDictionaryBigramList bigramList) {
|
|
||||||
final DictEncoder dictEncoder = new Ver2DictEncoder(file);
|
|
||||||
UserHistoryDictIOUtils.writeDictionary(dictEncoder, this, bigramList, FORMAT_OPTIONS,
|
|
||||||
HEADER_OPTIONS);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void readDictFromFile(final File file, final OnAddWordListener listener)
|
|
||||||
throws IOException, FileNotFoundException, UnsupportedFormatException {
|
|
||||||
final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, DictDecoder.USE_BYTEARRAY);
|
|
||||||
dictDecoder.openDictBuffer();
|
|
||||||
UserHistoryDictIOUtils.readDictionaryBinary(dictDecoder, listener);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testGenerateFusionDictionary() {
|
|
||||||
final UserHistoryDictionaryBigramList originalList = new UserHistoryDictionaryBigramList();
|
|
||||||
|
|
||||||
final HashMap<String, ArrayList<String> > addedWords =
|
|
||||||
new HashMap<String, ArrayList<String>>();
|
|
||||||
addBigramToBigramList("this", "is", addedWords, originalList);
|
|
||||||
addBigramToBigramList("this", "was", addedWords, originalList);
|
|
||||||
addBigramToBigramList("hello", "world", addedWords, originalList);
|
|
||||||
|
|
||||||
final FusionDictionary fusionDict = UserHistoryDictIOUtils.constructFusionDictionary(
|
|
||||||
this, originalList, HEADER_OPTIONS);
|
|
||||||
|
|
||||||
checkWordsInFusionDict(fusionDict, addedWords);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testReadAndWrite() throws IOException, FileNotFoundException,
|
|
||||||
UnsupportedFormatException {
|
|
||||||
final Context context = getContext();
|
|
||||||
|
|
||||||
File file = null;
|
|
||||||
try {
|
|
||||||
file = File.createTempFile("testReadAndWrite", TEST_DICT_FILE_EXTENSION,
|
|
||||||
getContext().getCacheDir());
|
|
||||||
} catch (IOException e) {
|
|
||||||
Log.d(TAG, "IOException while creating a temporary file", e);
|
|
||||||
}
|
|
||||||
assertNotNull(file);
|
|
||||||
|
|
||||||
// make original dictionary
|
|
||||||
final UserHistoryDictionaryBigramList originalList = new UserHistoryDictionaryBigramList();
|
|
||||||
final HashMap<String, ArrayList<String>> addedWords = CollectionUtils.newHashMap();
|
|
||||||
addBigramToBigramList("this" , "is" , addedWords, originalList);
|
|
||||||
addBigramToBigramList("this" , "was" , addedWords, originalList);
|
|
||||||
addBigramToBigramList("is" , "not" , addedWords, originalList);
|
|
||||||
addBigramToBigramList("hello", "world", addedWords, originalList);
|
|
||||||
|
|
||||||
// write to file
|
|
||||||
writeDictToFile(file, originalList);
|
|
||||||
|
|
||||||
// make result dict.
|
|
||||||
final UserHistoryDictionaryBigramList resultList = new UserHistoryDictionaryBigramList();
|
|
||||||
final OnAddWordListener listener = new OnAddWordListener() {
|
|
||||||
@Override
|
|
||||||
public void setUnigram(final String word, final String shortcutTarget,
|
|
||||||
final int frequency, final int shortcutFreq) {
|
|
||||||
Log.d(TAG, "in: setUnigram: " + word + "," + frequency);
|
|
||||||
resultList.addBigram(null, word, (byte)frequency);
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public void setBigram(final String word1, final String word2, final int frequency) {
|
|
||||||
Log.d(TAG, "in: setBigram: " + word1 + "," + word2 + "," + frequency);
|
|
||||||
resultList.addBigram(word1, word2, (byte)frequency);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// load from file
|
|
||||||
readDictFromFile(file, listener);
|
|
||||||
checkWordsInBigramList(resultList, addedWords);
|
|
||||||
|
|
||||||
// add new bigram
|
|
||||||
addBigramToBigramList("hello", "java", addedWords, resultList);
|
|
||||||
|
|
||||||
// rewrite
|
|
||||||
writeDictToFile(file, resultList);
|
|
||||||
final UserHistoryDictionaryBigramList resultList2 = new UserHistoryDictionaryBigramList();
|
|
||||||
final OnAddWordListener listener2 = new OnAddWordListener() {
|
|
||||||
@Override
|
|
||||||
public void setUnigram(final String word, final String shortcutTarget,
|
|
||||||
final int frequency, final int shortcutFreq) {
|
|
||||||
Log.d(TAG, "in: setUnigram: " + word + "," + frequency);
|
|
||||||
resultList2.addBigram(null, word, (byte)frequency);
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public void setBigram(final String word1, final String word2, final int frequency) {
|
|
||||||
Log.d(TAG, "in: setBigram: " + word1 + "," + word2 + "," + frequency);
|
|
||||||
resultList2.addBigram(word1, word2, (byte)frequency);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// load from file
|
|
||||||
readDictFromFile(file, listener2);
|
|
||||||
checkWordsInBigramList(resultList2, addedWords);
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in a new issue