Merge "Set the shortcut frequency correctly."

main
Jean Chalard 2013-10-07 10:00:34 +00:00 committed by Android (Google) Code Review
commit a6047aae94
11 changed files with 92 additions and 31 deletions

View File

@ -41,8 +41,17 @@ abstract public class AbstractDictionaryWriter extends Dictionary {
abstract public void clear();
/**
* Add a unigram with an optional shortcut to the dictionary.
* @param word The word to add.
* @param shortcutTarget A shortcut target for this word, or null if none.
* @param frequency The frequency for this unigram.
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
* if shortcutTarget is null.
* @param isNotAWord true if this is not a word, i.e. shortcut only.
*/
abstract public void addUnigramWord(final String word, final String shortcutTarget,
final int frequency, final boolean isNotAWord);
final int frequency, final int shortcutFreq, final boolean isNotAWord);
// TODO: Remove lastModifiedTime after making binary dictionary support forgetting curve.
abstract public void addBigramWords(final String word0, final String word1,

View File

@ -127,7 +127,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
if (DEBUG) {
Log.d(TAG, "loadAccountVocabulary: " + word);
}
super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS,
super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS, 0 /* shortcutFreq */,
false /* isNotAWord */);
}
}
@ -213,7 +213,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
Log.d(TAG, "addName " + name + ", " + word + ", " + prevWord);
}
super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS,
false /* isNotAWord */);
0 /* shortcutFreq */, false /* isNotAWord */);
if (!TextUtils.isEmpty(prevWord)) {
if (mUseFirstLastBigrams) {
super.addBigram(prevWord, word, FREQUENCY_FOR_CONTACTS_BIGRAM,

View File

@ -62,13 +62,13 @@ public class DictionaryWriter extends AbstractDictionaryWriter {
// considering performance regression.
@Override
public void addUnigramWord(final String word, final String shortcutTarget, final int frequency,
final boolean isNotAWord) {
final int shortcutFreq, final boolean isNotAWord) {
if (shortcutTarget == null) {
mFusionDictionary.add(word, frequency, null, isNotAWord);
} else {
// TODO: Do this in the subclass, with this class taking an arraylist.
final ArrayList<WeightedString> shortcutTargets = CollectionUtils.newArrayList();
shortcutTargets.add(new WeightedString(shortcutTarget, frequency));
shortcutTargets.add(new WeightedString(shortcutTarget, shortcutFreq));
mFusionDictionary.add(word, frequency, shortcutTargets, isNotAWord);
}
}

View File

@ -261,10 +261,16 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
/**
* Adds a word unigram to the dictionary. Used for loading a dictionary.
* @param word The word to add.
* @param shortcutTarget A shortcut target for this word, or null if none.
* @param frequency The frequency for this unigram.
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
* if shortcutTarget is null.
* @param isNotAWord true if this is not a word, i.e. shortcut only.
*/
protected void addWord(final String word, final String shortcutTarget,
final int frequency, final boolean isNotAWord) {
mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, isNotAWord);
final int frequency, final int shortcutFreq, final boolean isNotAWord) {
mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, shortcutFreq, isNotAWord);
}
/**
@ -313,7 +319,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
* Dynamically adds a word unigram to the dictionary. May overwrite an existing entry.
*/
protected void addWordDynamically(final String word, final String shortcutTarget,
final int frequency, final boolean isNotAWord) {
final int frequency, final int shortcutFreq, final boolean isNotAWord) {
if (!mIsUpdatable) {
Log.w(TAG, "addWordDynamically is called for non-updatable dictionary: " + mFilename);
return;
@ -326,7 +332,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
mBinaryDictionary.addUnigramWord(word, frequency);
} else {
// TODO: Remove.
mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, isNotAWord);
mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, shortcutFreq,
isNotAWord);
}
}
});

View File

@ -156,15 +156,36 @@ public class ExpandableDictionary extends Dictionary {
return Constants.DICTIONARY_MAX_WORD_LENGTH;
}
public void addWord(final String word, final String shortcutTarget, final int frequency) {
/**
* Add a word with an optional shortcut to the dictionary.
* @param word The word to add.
* @param shortcutTarget A shortcut target for this word, or null if none.
* @param frequency The frequency for this unigram.
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
* if shortcutTarget is null.
*/
public void addWord(final String word, final String shortcutTarget, final int frequency,
final int shortcutFreq) {
if (word.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH) {
return;
}
addWordRec(mRoots, word, 0, shortcutTarget, frequency, null);
addWordRec(mRoots, word, 0, shortcutTarget, frequency, shortcutFreq, null);
}
/**
* Add a word, recursively searching for its correct place in the trie tree.
* @param children The node to recursively search for addition. Initially, the root of the tree.
* @param word The word to add.
* @param depth The current depth in the tree.
* @param shortcutTarget A shortcut target for this word, or null if none.
* @param frequency The frequency for this unigram.
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
* if shortcutTarget is null.
* @param parentNode The parent node, for up linking. Initially null, as the root has no parent.
*/
private void addWordRec(final NodeArray children, final String word, final int depth,
final String shortcutTarget, final int frequency, final Node parentNode) {
final String shortcutTarget, final int frequency, final int shortcutFreq,
final Node parentNode) {
final int wordLength = word.length();
if (wordLength <= depth) return;
final char c = word.charAt(depth);
@ -204,7 +225,8 @@ public class ExpandableDictionary extends Dictionary {
if (childNode.mChildren == null) {
childNode.mChildren = new NodeArray();
}
addWordRec(childNode.mChildren, word, depth + 1, shortcutTarget, frequency, childNode);
addWordRec(childNode.mChildren, word, depth + 1, shortcutTarget, frequency, shortcutFreq,
childNode);
}
@Override

View File

@ -47,6 +47,9 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary {
private static final String USER_DICTIONARY_ALL_LANGUAGES = "";
private static final int HISTORICAL_DEFAULT_USER_DICTIONARY_FREQUENCY = 250;
private static final int LATINIME_DEFAULT_USER_DICTIONARY_FREQUENCY = 160;
// Shortcut frequency is 0~15, with 15 = whitelist. We don't want user dictionary entries
// to auto-correct, so we set this to the highest frequency that won't, i.e. 14.
private static final int USER_DICT_SHORTCUT_FREQUENCY = 14;
// TODO: use Words.SHORTCUT when we target JellyBean or above
final static String SHORTCUT = "shortcut";
@ -243,10 +246,12 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary {
final int adjustedFrequency = scaleFrequencyFromDefaultToLatinIme(frequency);
// Safeguard against adding really long words.
if (word.length() < MAX_WORD_LENGTH) {
super.addWord(word, null, adjustedFrequency, false /* isNotAWord */);
super.addWord(word, null, adjustedFrequency, 0 /* shortcutFreq */,
false /* isNotAWord */);
}
if (null != shortcut && shortcut.length() < MAX_WORD_LENGTH) {
super.addWord(shortcut, word, adjustedFrequency, true /* isNotAWord */);
super.addWord(shortcut, word, adjustedFrequency, USER_DICT_SHORTCUT_FREQUENCY,
true /* isNotAWord */);
}
cursor.moveToNext();
}

View File

@ -138,7 +138,7 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
final int frequency = ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE ?
(isValid ? FREQUENCY_FOR_WORDS_IN_DICTS : FREQUENCY_FOR_WORDS_NOT_IN_DICTS) :
FREQUENCY_FOR_TYPED;
addWordDynamically(word1, null /* the "shortcut" parameter is null */, frequency,
addWordDynamically(word1, null /* shortcutTarget */, frequency, 0 /* shortcutFreq */,
false /* isNotAWord */);
// Do not insert a word as a bigram of itself
if (word1.equals(word0)) {
@ -171,11 +171,11 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
final OnAddWordListener listener = new OnAddWordListener() {
@Override
public void setUnigram(final String word, final String shortcutTarget,
final int frequency) {
final int frequency, final int shortcutFreq) {
if (DBG_SAVE_RESTORE) {
Log.d(TAG, "load unigram: " + word + "," + frequency);
}
addWord(word, shortcutTarget, frequency, false /* isNotAWord */);
addWord(word, shortcutTarget, frequency, shortcutFreq, false /* isNotAWord */);
++profTotalCount[0];
}

View File

@ -75,15 +75,21 @@ public class DynamicPersonalizationDictionaryWriter extends AbstractDictionaryWr
/**
* Adds a word unigram to the fusion dictionary. Call updateBinaryDictionary when all changes
* are done to update the binary dictionary.
* @param word The word to add.
* @param shortcutTarget A shortcut target for this word, or null if none.
* @param frequency The frequency for this unigram.
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
* if shortcutTarget is null.
* @param isNotAWord true if this is not a word, i.e. shortcut only.
*/
@Override
public void addUnigramWord(final String word, final String shortcutTarget, final int frequency,
final boolean isNotAWord) {
final int shortcutFreq, final boolean isNotAWord) {
if (mBigramList.size() > mMaxHistoryBigrams * 2) {
// Too many entries: just stop adding new vocabulary and wait next refresh.
return;
}
mExpandableDictionary.addWord(word, shortcutTarget, frequency);
mExpandableDictionary.addWord(word, shortcutTarget, frequency, shortcutFreq);
mBigramList.addBigram(null, word, (byte)frequency);
}

View File

@ -49,7 +49,16 @@ public final class UserHistoryDictIOUtils {
private static final String LAST_UPDATED_TIME_KEY = "date";
public interface OnAddWordListener {
public void setUnigram(final String word, final String shortcutTarget, final int frequency);
/**
* Callback to be notified when a word is added to the dictionary.
* @param word The added word.
* @param shortcutTarget A shortcut target for this word, or null if none.
* @param frequency The frequency for this word.
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist).
* Unspecified if shortcutTarget is null - do not rely on its value.
*/
public void setUnigram(final String word, final String shortcutTarget, final int frequency,
final int shortcutFreq);
public void setBigram(final String word1, final String word2, final int frequency);
}
@ -153,7 +162,7 @@ public final class UserHistoryDictIOUtils {
for (Entry<Integer, String> entry : unigrams.entrySet()) {
final String word1 = entry.getValue();
final int unigramFrequency = frequencies.get(entry.getKey());
to.setUnigram(word1, null, unigramFrequency);
to.setUnigram(word1, null /* shortcutTarget */, unigramFrequency, 0 /* shortcutFreq */);
final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());
if (attrList != null) {
for (final PendingAttribute attr : attrList) {

View File

@ -26,13 +26,16 @@ import android.test.suitebuilder.annotation.SmallTest;
public class ExpandableDictionaryTests extends AndroidTestCase {
private final static int UNIGRAM_FREQ = 50;
// See UserBinaryDictionary for more information about this variable.
// For tests, its actual value does not matter.
private final static int SHORTCUT_FREQ = 14;
public void testAddWordAndGetWordFrequency() {
final ExpandableDictionary dict = new ExpandableDictionary(Dictionary.TYPE_USER);
// Add words
dict.addWord("abcde", "abcde", UNIGRAM_FREQ);
dict.addWord("abcef", null, UNIGRAM_FREQ + 1);
dict.addWord("abcde", "abcde", UNIGRAM_FREQ, SHORTCUT_FREQ);
dict.addWord("abcef", null, UNIGRAM_FREQ + 1, 0);
// Check words
assertFalse(dict.isValidWord("abcde"));
@ -40,16 +43,16 @@ public class ExpandableDictionaryTests extends AndroidTestCase {
assertTrue(dict.isValidWord("abcef"));
assertEquals(UNIGRAM_FREQ+1, dict.getWordFrequency("abcef"));
dict.addWord("abc", null, UNIGRAM_FREQ + 2);
dict.addWord("abc", null, UNIGRAM_FREQ + 2, 0);
assertTrue(dict.isValidWord("abc"));
assertEquals(UNIGRAM_FREQ + 2, dict.getWordFrequency("abc"));
// Add existing word with lower frequency
dict.addWord("abc", null, UNIGRAM_FREQ);
dict.addWord("abc", null, UNIGRAM_FREQ, 0);
assertEquals(UNIGRAM_FREQ + 2, dict.getWordFrequency("abc"));
// Add existing word with higher frequency
dict.addWord("abc", null, UNIGRAM_FREQ + 3);
dict.addWord("abc", null, UNIGRAM_FREQ + 3, 0);
assertEquals(UNIGRAM_FREQ + 3, dict.getWordFrequency("abc"));
}
}

View File

@ -196,8 +196,8 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
final UserHistoryDictionaryBigramList resultList = new UserHistoryDictionaryBigramList();
final OnAddWordListener listener = new OnAddWordListener() {
@Override
public void setUnigram(final String word,
final String shortcutTarget, final int frequency) {
public void setUnigram(final String word, final String shortcutTarget,
final int frequency, final int shortcutFreq) {
Log.d(TAG, "in: setUnigram: " + word + "," + frequency);
resultList.addBigram(null, word, (byte)frequency);
}
@ -220,8 +220,8 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
final UserHistoryDictionaryBigramList resultList2 = new UserHistoryDictionaryBigramList();
final OnAddWordListener listener2 = new OnAddWordListener() {
@Override
public void setUnigram(final String word,
final String shortcutTarget, final int frequency) {
public void setUnigram(final String word, final String shortcutTarget,
final int frequency, final int shortcutFreq) {
Log.d(TAG, "in: setUnigram: " + word + "," + frequency);
resultList2.addBigram(null, word, (byte)frequency);
}