* commit 'e6b018aa5c9f58dcc9e6acea8b80171f8f807887': Set the shortcut frequency correctly.
This commit is contained in:
commit
05aaedd024
11 changed files with 92 additions and 31 deletions
|
@ -41,8 +41,17 @@ abstract public class AbstractDictionaryWriter extends Dictionary {
|
|||
|
||||
abstract public void clear();
|
||||
|
||||
/**
|
||||
* Add a unigram with an optional shortcut to the dictionary.
|
||||
* @param word The word to add.
|
||||
* @param shortcutTarget A shortcut target for this word, or null if none.
|
||||
* @param frequency The frequency for this unigram.
|
||||
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
|
||||
* if shortcutTarget is null.
|
||||
* @param isNotAWord true if this is not a word, i.e. shortcut only.
|
||||
*/
|
||||
abstract public void addUnigramWord(final String word, final String shortcutTarget,
|
||||
final int frequency, final boolean isNotAWord);
|
||||
final int frequency, final int shortcutFreq, final boolean isNotAWord);
|
||||
|
||||
// TODO: Remove lastModifiedTime after making binary dictionary support forgetting curve.
|
||||
abstract public void addBigramWords(final String word0, final String word1,
|
||||
|
|
|
@ -127,7 +127,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
|
|||
if (DEBUG) {
|
||||
Log.d(TAG, "loadAccountVocabulary: " + word);
|
||||
}
|
||||
super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS,
|
||||
super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS, 0 /* shortcutFreq */,
|
||||
false /* isNotAWord */);
|
||||
}
|
||||
}
|
||||
|
@ -213,7 +213,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
|
|||
Log.d(TAG, "addName " + name + ", " + word + ", " + prevWord);
|
||||
}
|
||||
super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS,
|
||||
false /* isNotAWord */);
|
||||
0 /* shortcutFreq */, false /* isNotAWord */);
|
||||
if (!TextUtils.isEmpty(prevWord)) {
|
||||
if (mUseFirstLastBigrams) {
|
||||
super.addBigram(prevWord, word, FREQUENCY_FOR_CONTACTS_BIGRAM,
|
||||
|
|
|
@ -62,13 +62,13 @@ public class DictionaryWriter extends AbstractDictionaryWriter {
|
|||
// considering performance regression.
|
||||
@Override
|
||||
public void addUnigramWord(final String word, final String shortcutTarget, final int frequency,
|
||||
final boolean isNotAWord) {
|
||||
final int shortcutFreq, final boolean isNotAWord) {
|
||||
if (shortcutTarget == null) {
|
||||
mFusionDictionary.add(word, frequency, null, isNotAWord);
|
||||
} else {
|
||||
// TODO: Do this in the subclass, with this class taking an arraylist.
|
||||
final ArrayList<WeightedString> shortcutTargets = CollectionUtils.newArrayList();
|
||||
shortcutTargets.add(new WeightedString(shortcutTarget, frequency));
|
||||
shortcutTargets.add(new WeightedString(shortcutTarget, shortcutFreq));
|
||||
mFusionDictionary.add(word, frequency, shortcutTargets, isNotAWord);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -261,10 +261,16 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
|
||||
/**
|
||||
* Adds a word unigram to the dictionary. Used for loading a dictionary.
|
||||
* @param word The word to add.
|
||||
* @param shortcutTarget A shortcut target for this word, or null if none.
|
||||
* @param frequency The frequency for this unigram.
|
||||
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
|
||||
* if shortcutTarget is null.
|
||||
* @param isNotAWord true if this is not a word, i.e. shortcut only.
|
||||
*/
|
||||
protected void addWord(final String word, final String shortcutTarget,
|
||||
final int frequency, final boolean isNotAWord) {
|
||||
mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, isNotAWord);
|
||||
final int frequency, final int shortcutFreq, final boolean isNotAWord) {
|
||||
mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, shortcutFreq, isNotAWord);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -313,7 +319,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
* Dynamically adds a word unigram to the dictionary. May overwrite an existing entry.
|
||||
*/
|
||||
protected void addWordDynamically(final String word, final String shortcutTarget,
|
||||
final int frequency, final boolean isNotAWord) {
|
||||
final int frequency, final int shortcutFreq, final boolean isNotAWord) {
|
||||
if (!mIsUpdatable) {
|
||||
Log.w(TAG, "addWordDynamically is called for non-updatable dictionary: " + mFilename);
|
||||
return;
|
||||
|
@ -326,7 +332,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
mBinaryDictionary.addUnigramWord(word, frequency);
|
||||
} else {
|
||||
// TODO: Remove.
|
||||
mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, isNotAWord);
|
||||
mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, shortcutFreq,
|
||||
isNotAWord);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
|
|
@ -156,15 +156,36 @@ public class ExpandableDictionary extends Dictionary {
|
|||
return Constants.DICTIONARY_MAX_WORD_LENGTH;
|
||||
}
|
||||
|
||||
public void addWord(final String word, final String shortcutTarget, final int frequency) {
|
||||
/**
|
||||
* Add a word with an optional shortcut to the dictionary.
|
||||
* @param word The word to add.
|
||||
* @param shortcutTarget A shortcut target for this word, or null if none.
|
||||
* @param frequency The frequency for this unigram.
|
||||
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
|
||||
* if shortcutTarget is null.
|
||||
*/
|
||||
public void addWord(final String word, final String shortcutTarget, final int frequency,
|
||||
final int shortcutFreq) {
|
||||
if (word.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH) {
|
||||
return;
|
||||
}
|
||||
addWordRec(mRoots, word, 0, shortcutTarget, frequency, null);
|
||||
addWordRec(mRoots, word, 0, shortcutTarget, frequency, shortcutFreq, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a word, recursively searching for its correct place in the trie tree.
|
||||
* @param children The node to recursively search for addition. Initially, the root of the tree.
|
||||
* @param word The word to add.
|
||||
* @param depth The current depth in the tree.
|
||||
* @param shortcutTarget A shortcut target for this word, or null if none.
|
||||
* @param frequency The frequency for this unigram.
|
||||
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
|
||||
* if shortcutTarget is null.
|
||||
* @param parentNode The parent node, for up linking. Initially null, as the root has no parent.
|
||||
*/
|
||||
private void addWordRec(final NodeArray children, final String word, final int depth,
|
||||
final String shortcutTarget, final int frequency, final Node parentNode) {
|
||||
final String shortcutTarget, final int frequency, final int shortcutFreq,
|
||||
final Node parentNode) {
|
||||
final int wordLength = word.length();
|
||||
if (wordLength <= depth) return;
|
||||
final char c = word.charAt(depth);
|
||||
|
@ -204,7 +225,8 @@ public class ExpandableDictionary extends Dictionary {
|
|||
if (childNode.mChildren == null) {
|
||||
childNode.mChildren = new NodeArray();
|
||||
}
|
||||
addWordRec(childNode.mChildren, word, depth + 1, shortcutTarget, frequency, childNode);
|
||||
addWordRec(childNode.mChildren, word, depth + 1, shortcutTarget, frequency, shortcutFreq,
|
||||
childNode);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -47,6 +47,9 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary {
|
|||
private static final String USER_DICTIONARY_ALL_LANGUAGES = "";
|
||||
private static final int HISTORICAL_DEFAULT_USER_DICTIONARY_FREQUENCY = 250;
|
||||
private static final int LATINIME_DEFAULT_USER_DICTIONARY_FREQUENCY = 160;
|
||||
// Shortcut frequency is 0~15, with 15 = whitelist. We don't want user dictionary entries
|
||||
// to auto-correct, so we set this to the highest frequency that won't, i.e. 14.
|
||||
private static final int USER_DICT_SHORTCUT_FREQUENCY = 14;
|
||||
|
||||
// TODO: use Words.SHORTCUT when we target JellyBean or above
|
||||
final static String SHORTCUT = "shortcut";
|
||||
|
@ -243,10 +246,12 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary {
|
|||
final int adjustedFrequency = scaleFrequencyFromDefaultToLatinIme(frequency);
|
||||
// Safeguard against adding really long words.
|
||||
if (word.length() < MAX_WORD_LENGTH) {
|
||||
super.addWord(word, null, adjustedFrequency, false /* isNotAWord */);
|
||||
super.addWord(word, null, adjustedFrequency, 0 /* shortcutFreq */,
|
||||
false /* isNotAWord */);
|
||||
}
|
||||
if (null != shortcut && shortcut.length() < MAX_WORD_LENGTH) {
|
||||
super.addWord(shortcut, word, adjustedFrequency, true /* isNotAWord */);
|
||||
super.addWord(shortcut, word, adjustedFrequency, USER_DICT_SHORTCUT_FREQUENCY,
|
||||
true /* isNotAWord */);
|
||||
}
|
||||
cursor.moveToNext();
|
||||
}
|
||||
|
|
|
@ -138,7 +138,7 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
|
|||
final int frequency = ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE ?
|
||||
(isValid ? FREQUENCY_FOR_WORDS_IN_DICTS : FREQUENCY_FOR_WORDS_NOT_IN_DICTS) :
|
||||
FREQUENCY_FOR_TYPED;
|
||||
addWordDynamically(word1, null /* the "shortcut" parameter is null */, frequency,
|
||||
addWordDynamically(word1, null /* shortcutTarget */, frequency, 0 /* shortcutFreq */,
|
||||
false /* isNotAWord */);
|
||||
// Do not insert a word as a bigram of itself
|
||||
if (word1.equals(word0)) {
|
||||
|
@ -171,11 +171,11 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
|
|||
final OnAddWordListener listener = new OnAddWordListener() {
|
||||
@Override
|
||||
public void setUnigram(final String word, final String shortcutTarget,
|
||||
final int frequency) {
|
||||
final int frequency, final int shortcutFreq) {
|
||||
if (DBG_SAVE_RESTORE) {
|
||||
Log.d(TAG, "load unigram: " + word + "," + frequency);
|
||||
}
|
||||
addWord(word, shortcutTarget, frequency, false /* isNotAWord */);
|
||||
addWord(word, shortcutTarget, frequency, shortcutFreq, false /* isNotAWord */);
|
||||
++profTotalCount[0];
|
||||
}
|
||||
|
||||
|
|
|
@ -75,15 +75,21 @@ public class DynamicPersonalizationDictionaryWriter extends AbstractDictionaryWr
|
|||
/**
|
||||
* Adds a word unigram to the fusion dictionary. Call updateBinaryDictionary when all changes
|
||||
* are done to update the binary dictionary.
|
||||
* @param word The word to add.
|
||||
* @param shortcutTarget A shortcut target for this word, or null if none.
|
||||
* @param frequency The frequency for this unigram.
|
||||
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
|
||||
* if shortcutTarget is null.
|
||||
* @param isNotAWord true if this is not a word, i.e. shortcut only.
|
||||
*/
|
||||
@Override
|
||||
public void addUnigramWord(final String word, final String shortcutTarget, final int frequency,
|
||||
final boolean isNotAWord) {
|
||||
final int shortcutFreq, final boolean isNotAWord) {
|
||||
if (mBigramList.size() > mMaxHistoryBigrams * 2) {
|
||||
// Too many entries: just stop adding new vocabulary and wait next refresh.
|
||||
return;
|
||||
}
|
||||
mExpandableDictionary.addWord(word, shortcutTarget, frequency);
|
||||
mExpandableDictionary.addWord(word, shortcutTarget, frequency, shortcutFreq);
|
||||
mBigramList.addBigram(null, word, (byte)frequency);
|
||||
}
|
||||
|
||||
|
|
|
@ -49,7 +49,16 @@ public final class UserHistoryDictIOUtils {
|
|||
private static final String LAST_UPDATED_TIME_KEY = "date";
|
||||
|
||||
public interface OnAddWordListener {
|
||||
public void setUnigram(final String word, final String shortcutTarget, final int frequency);
|
||||
/**
|
||||
* Callback to be notified when a word is added to the dictionary.
|
||||
* @param word The added word.
|
||||
* @param shortcutTarget A shortcut target for this word, or null if none.
|
||||
* @param frequency The frequency for this word.
|
||||
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist).
|
||||
* Unspecified if shortcutTarget is null - do not rely on its value.
|
||||
*/
|
||||
public void setUnigram(final String word, final String shortcutTarget, final int frequency,
|
||||
final int shortcutFreq);
|
||||
public void setBigram(final String word1, final String word2, final int frequency);
|
||||
}
|
||||
|
||||
|
@ -153,7 +162,7 @@ public final class UserHistoryDictIOUtils {
|
|||
for (Entry<Integer, String> entry : unigrams.entrySet()) {
|
||||
final String word1 = entry.getValue();
|
||||
final int unigramFrequency = frequencies.get(entry.getKey());
|
||||
to.setUnigram(word1, null, unigramFrequency);
|
||||
to.setUnigram(word1, null /* shortcutTarget */, unigramFrequency, 0 /* shortcutFreq */);
|
||||
final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());
|
||||
if (attrList != null) {
|
||||
for (final PendingAttribute attr : attrList) {
|
||||
|
|
|
@ -26,13 +26,16 @@ import android.test.suitebuilder.annotation.SmallTest;
|
|||
public class ExpandableDictionaryTests extends AndroidTestCase {
|
||||
|
||||
private final static int UNIGRAM_FREQ = 50;
|
||||
// See UserBinaryDictionary for more information about this variable.
|
||||
// For tests, its actual value does not matter.
|
||||
private final static int SHORTCUT_FREQ = 14;
|
||||
|
||||
public void testAddWordAndGetWordFrequency() {
|
||||
final ExpandableDictionary dict = new ExpandableDictionary(Dictionary.TYPE_USER);
|
||||
|
||||
// Add words
|
||||
dict.addWord("abcde", "abcde", UNIGRAM_FREQ);
|
||||
dict.addWord("abcef", null, UNIGRAM_FREQ + 1);
|
||||
dict.addWord("abcde", "abcde", UNIGRAM_FREQ, SHORTCUT_FREQ);
|
||||
dict.addWord("abcef", null, UNIGRAM_FREQ + 1, 0);
|
||||
|
||||
// Check words
|
||||
assertFalse(dict.isValidWord("abcde"));
|
||||
|
@ -40,16 +43,16 @@ public class ExpandableDictionaryTests extends AndroidTestCase {
|
|||
assertTrue(dict.isValidWord("abcef"));
|
||||
assertEquals(UNIGRAM_FREQ+1, dict.getWordFrequency("abcef"));
|
||||
|
||||
dict.addWord("abc", null, UNIGRAM_FREQ + 2);
|
||||
dict.addWord("abc", null, UNIGRAM_FREQ + 2, 0);
|
||||
assertTrue(dict.isValidWord("abc"));
|
||||
assertEquals(UNIGRAM_FREQ + 2, dict.getWordFrequency("abc"));
|
||||
|
||||
// Add existing word with lower frequency
|
||||
dict.addWord("abc", null, UNIGRAM_FREQ);
|
||||
dict.addWord("abc", null, UNIGRAM_FREQ, 0);
|
||||
assertEquals(UNIGRAM_FREQ + 2, dict.getWordFrequency("abc"));
|
||||
|
||||
// Add existing word with higher frequency
|
||||
dict.addWord("abc", null, UNIGRAM_FREQ + 3);
|
||||
dict.addWord("abc", null, UNIGRAM_FREQ + 3, 0);
|
||||
assertEquals(UNIGRAM_FREQ + 3, dict.getWordFrequency("abc"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -196,8 +196,8 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
|
|||
final UserHistoryDictionaryBigramList resultList = new UserHistoryDictionaryBigramList();
|
||||
final OnAddWordListener listener = new OnAddWordListener() {
|
||||
@Override
|
||||
public void setUnigram(final String word,
|
||||
final String shortcutTarget, final int frequency) {
|
||||
public void setUnigram(final String word, final String shortcutTarget,
|
||||
final int frequency, final int shortcutFreq) {
|
||||
Log.d(TAG, "in: setUnigram: " + word + "," + frequency);
|
||||
resultList.addBigram(null, word, (byte)frequency);
|
||||
}
|
||||
|
@ -220,8 +220,8 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
|
|||
final UserHistoryDictionaryBigramList resultList2 = new UserHistoryDictionaryBigramList();
|
||||
final OnAddWordListener listener2 = new OnAddWordListener() {
|
||||
@Override
|
||||
public void setUnigram(final String word,
|
||||
final String shortcutTarget, final int frequency) {
|
||||
public void setUnigram(final String word, final String shortcutTarget,
|
||||
final int frequency, final int shortcutFreq) {
|
||||
Log.d(TAG, "in: setUnigram: " + word + "," + frequency);
|
||||
resultList2.addBigram(null, word, (byte)frequency);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue