Set the shortcut frequency correctly.

14 is the right value.

Bug: 11076722
Change-Id: I95d404b540f7fbe4932d1f8498cde23f1df0314f
main
Jean Chalard 2013-10-04 23:26:18 +09:00
parent a8f4efd013
commit f3204eebb1
11 changed files with 92 additions and 31 deletions

View File

@ -41,8 +41,17 @@ abstract public class AbstractDictionaryWriter extends Dictionary {
abstract public void clear(); abstract public void clear();
/**
* Add a unigram with an optional shortcut to the dictionary.
* @param word The word to add.
* @param shortcutTarget A shortcut target for this word, or null if none.
* @param frequency The frequency for this unigram.
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
* if shortcutTarget is null.
* @param isNotAWord true if this is not a word, i.e. shortcut only.
*/
abstract public void addUnigramWord(final String word, final String shortcutTarget, abstract public void addUnigramWord(final String word, final String shortcutTarget,
final int frequency, final boolean isNotAWord); final int frequency, final int shortcutFreq, final boolean isNotAWord);
// TODO: Remove lastModifiedTime after making binary dictionary support forgetting curve. // TODO: Remove lastModifiedTime after making binary dictionary support forgetting curve.
abstract public void addBigramWords(final String word0, final String word1, abstract public void addBigramWords(final String word0, final String word1,

View File

@ -127,7 +127,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
if (DEBUG) { if (DEBUG) {
Log.d(TAG, "loadAccountVocabulary: " + word); Log.d(TAG, "loadAccountVocabulary: " + word);
} }
super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS, super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS, 0 /* shortcutFreq */,
false /* isNotAWord */); false /* isNotAWord */);
} }
} }
@ -213,7 +213,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
Log.d(TAG, "addName " + name + ", " + word + ", " + prevWord); Log.d(TAG, "addName " + name + ", " + word + ", " + prevWord);
} }
super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS, super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS,
false /* isNotAWord */); 0 /* shortcutFreq */, false /* isNotAWord */);
if (!TextUtils.isEmpty(prevWord)) { if (!TextUtils.isEmpty(prevWord)) {
if (mUseFirstLastBigrams) { if (mUseFirstLastBigrams) {
super.addBigram(prevWord, word, FREQUENCY_FOR_CONTACTS_BIGRAM, super.addBigram(prevWord, word, FREQUENCY_FOR_CONTACTS_BIGRAM,

View File

@ -62,13 +62,13 @@ public class DictionaryWriter extends AbstractDictionaryWriter {
// considering performance regression. // considering performance regression.
@Override @Override
public void addUnigramWord(final String word, final String shortcutTarget, final int frequency, public void addUnigramWord(final String word, final String shortcutTarget, final int frequency,
final boolean isNotAWord) { final int shortcutFreq, final boolean isNotAWord) {
if (shortcutTarget == null) { if (shortcutTarget == null) {
mFusionDictionary.add(word, frequency, null, isNotAWord); mFusionDictionary.add(word, frequency, null, isNotAWord);
} else { } else {
// TODO: Do this in the subclass, with this class taking an arraylist. // TODO: Do this in the subclass, with this class taking an arraylist.
final ArrayList<WeightedString> shortcutTargets = CollectionUtils.newArrayList(); final ArrayList<WeightedString> shortcutTargets = CollectionUtils.newArrayList();
shortcutTargets.add(new WeightedString(shortcutTarget, frequency)); shortcutTargets.add(new WeightedString(shortcutTarget, shortcutFreq));
mFusionDictionary.add(word, frequency, shortcutTargets, isNotAWord); mFusionDictionary.add(word, frequency, shortcutTargets, isNotAWord);
} }
} }

View File

@ -261,10 +261,16 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
/** /**
* Adds a word unigram to the dictionary. Used for loading a dictionary. * Adds a word unigram to the dictionary. Used for loading a dictionary.
* @param word The word to add.
* @param shortcutTarget A shortcut target for this word, or null if none.
* @param frequency The frequency for this unigram.
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
* if shortcutTarget is null.
* @param isNotAWord true if this is not a word, i.e. shortcut only.
*/ */
protected void addWord(final String word, final String shortcutTarget, protected void addWord(final String word, final String shortcutTarget,
final int frequency, final boolean isNotAWord) { final int frequency, final int shortcutFreq, final boolean isNotAWord) {
mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, isNotAWord); mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, shortcutFreq, isNotAWord);
} }
/** /**
@ -313,7 +319,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
* Dynamically adds a word unigram to the dictionary. May overwrite an existing entry. * Dynamically adds a word unigram to the dictionary. May overwrite an existing entry.
*/ */
protected void addWordDynamically(final String word, final String shortcutTarget, protected void addWordDynamically(final String word, final String shortcutTarget,
final int frequency, final boolean isNotAWord) { final int frequency, final int shortcutFreq, final boolean isNotAWord) {
if (!mIsUpdatable) { if (!mIsUpdatable) {
Log.w(TAG, "addWordDynamically is called for non-updatable dictionary: " + mFilename); Log.w(TAG, "addWordDynamically is called for non-updatable dictionary: " + mFilename);
return; return;
@ -326,7 +332,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
mBinaryDictionary.addUnigramWord(word, frequency); mBinaryDictionary.addUnigramWord(word, frequency);
} else { } else {
// TODO: Remove. // TODO: Remove.
mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, isNotAWord); mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, shortcutFreq,
isNotAWord);
} }
} }
}); });

View File

@ -156,15 +156,36 @@ public class ExpandableDictionary extends Dictionary {
return Constants.DICTIONARY_MAX_WORD_LENGTH; return Constants.DICTIONARY_MAX_WORD_LENGTH;
} }
public void addWord(final String word, final String shortcutTarget, final int frequency) { /**
* Add a word with an optional shortcut to the dictionary.
* @param word The word to add.
* @param shortcutTarget A shortcut target for this word, or null if none.
* @param frequency The frequency for this unigram.
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
* if shortcutTarget is null.
*/
public void addWord(final String word, final String shortcutTarget, final int frequency,
final int shortcutFreq) {
if (word.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH) { if (word.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH) {
return; return;
} }
addWordRec(mRoots, word, 0, shortcutTarget, frequency, null); addWordRec(mRoots, word, 0, shortcutTarget, frequency, shortcutFreq, null);
} }
/**
* Add a word, recursively searching for its correct place in the trie tree.
* @param children The node to recursively search for addition. Initially, the root of the tree.
* @param word The word to add.
* @param depth The current depth in the tree.
* @param shortcutTarget A shortcut target for this word, or null if none.
* @param frequency The frequency for this unigram.
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
* if shortcutTarget is null.
* @param parentNode The parent node, for up linking. Initially null, as the root has no parent.
*/
private void addWordRec(final NodeArray children, final String word, final int depth, private void addWordRec(final NodeArray children, final String word, final int depth,
final String shortcutTarget, final int frequency, final Node parentNode) { final String shortcutTarget, final int frequency, final int shortcutFreq,
final Node parentNode) {
final int wordLength = word.length(); final int wordLength = word.length();
if (wordLength <= depth) return; if (wordLength <= depth) return;
final char c = word.charAt(depth); final char c = word.charAt(depth);
@ -204,7 +225,8 @@ public class ExpandableDictionary extends Dictionary {
if (childNode.mChildren == null) { if (childNode.mChildren == null) {
childNode.mChildren = new NodeArray(); childNode.mChildren = new NodeArray();
} }
addWordRec(childNode.mChildren, word, depth + 1, shortcutTarget, frequency, childNode); addWordRec(childNode.mChildren, word, depth + 1, shortcutTarget, frequency, shortcutFreq,
childNode);
} }
@Override @Override

View File

@ -47,6 +47,9 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary {
private static final String USER_DICTIONARY_ALL_LANGUAGES = ""; private static final String USER_DICTIONARY_ALL_LANGUAGES = "";
private static final int HISTORICAL_DEFAULT_USER_DICTIONARY_FREQUENCY = 250; private static final int HISTORICAL_DEFAULT_USER_DICTIONARY_FREQUENCY = 250;
private static final int LATINIME_DEFAULT_USER_DICTIONARY_FREQUENCY = 160; private static final int LATINIME_DEFAULT_USER_DICTIONARY_FREQUENCY = 160;
// Shortcut frequency is 0~15, with 15 = whitelist. We don't want user dictionary entries
// to auto-correct, so we set this to the highest frequency that won't, i.e. 14.
private static final int USER_DICT_SHORTCUT_FREQUENCY = 14;
// TODO: use Words.SHORTCUT when we target JellyBean or above // TODO: use Words.SHORTCUT when we target JellyBean or above
final static String SHORTCUT = "shortcut"; final static String SHORTCUT = "shortcut";
@ -243,10 +246,12 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary {
final int adjustedFrequency = scaleFrequencyFromDefaultToLatinIme(frequency); final int adjustedFrequency = scaleFrequencyFromDefaultToLatinIme(frequency);
// Safeguard against adding really long words. // Safeguard against adding really long words.
if (word.length() < MAX_WORD_LENGTH) { if (word.length() < MAX_WORD_LENGTH) {
super.addWord(word, null, adjustedFrequency, false /* isNotAWord */); super.addWord(word, null, adjustedFrequency, 0 /* shortcutFreq */,
false /* isNotAWord */);
} }
if (null != shortcut && shortcut.length() < MAX_WORD_LENGTH) { if (null != shortcut && shortcut.length() < MAX_WORD_LENGTH) {
super.addWord(shortcut, word, adjustedFrequency, true /* isNotAWord */); super.addWord(shortcut, word, adjustedFrequency, USER_DICT_SHORTCUT_FREQUENCY,
true /* isNotAWord */);
} }
cursor.moveToNext(); cursor.moveToNext();
} }

View File

@ -138,7 +138,7 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
final int frequency = ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE ? final int frequency = ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE ?
(isValid ? FREQUENCY_FOR_WORDS_IN_DICTS : FREQUENCY_FOR_WORDS_NOT_IN_DICTS) : (isValid ? FREQUENCY_FOR_WORDS_IN_DICTS : FREQUENCY_FOR_WORDS_NOT_IN_DICTS) :
FREQUENCY_FOR_TYPED; FREQUENCY_FOR_TYPED;
addWordDynamically(word1, null /* the "shortcut" parameter is null */, frequency, addWordDynamically(word1, null /* shortcutTarget */, frequency, 0 /* shortcutFreq */,
false /* isNotAWord */); false /* isNotAWord */);
// Do not insert a word as a bigram of itself // Do not insert a word as a bigram of itself
if (word1.equals(word0)) { if (word1.equals(word0)) {
@ -171,11 +171,11 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
final OnAddWordListener listener = new OnAddWordListener() { final OnAddWordListener listener = new OnAddWordListener() {
@Override @Override
public void setUnigram(final String word, final String shortcutTarget, public void setUnigram(final String word, final String shortcutTarget,
final int frequency) { final int frequency, final int shortcutFreq) {
if (DBG_SAVE_RESTORE) { if (DBG_SAVE_RESTORE) {
Log.d(TAG, "load unigram: " + word + "," + frequency); Log.d(TAG, "load unigram: " + word + "," + frequency);
} }
addWord(word, shortcutTarget, frequency, false /* isNotAWord */); addWord(word, shortcutTarget, frequency, shortcutFreq, false /* isNotAWord */);
++profTotalCount[0]; ++profTotalCount[0];
} }

View File

@ -75,15 +75,21 @@ public class DynamicPersonalizationDictionaryWriter extends AbstractDictionaryWr
/** /**
* Adds a word unigram to the fusion dictionary. Call updateBinaryDictionary when all changes * Adds a word unigram to the fusion dictionary. Call updateBinaryDictionary when all changes
* are done to update the binary dictionary. * are done to update the binary dictionary.
* @param word The word to add.
* @param shortcutTarget A shortcut target for this word, or null if none.
* @param frequency The frequency for this unigram.
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
* if shortcutTarget is null.
* @param isNotAWord true if this is not a word, i.e. shortcut only.
*/ */
@Override @Override
public void addUnigramWord(final String word, final String shortcutTarget, final int frequency, public void addUnigramWord(final String word, final String shortcutTarget, final int frequency,
final boolean isNotAWord) { final int shortcutFreq, final boolean isNotAWord) {
if (mBigramList.size() > mMaxHistoryBigrams * 2) { if (mBigramList.size() > mMaxHistoryBigrams * 2) {
// Too many entries: just stop adding new vocabulary and wait next refresh. // Too many entries: just stop adding new vocabulary and wait next refresh.
return; return;
} }
mExpandableDictionary.addWord(word, shortcutTarget, frequency); mExpandableDictionary.addWord(word, shortcutTarget, frequency, shortcutFreq);
mBigramList.addBigram(null, word, (byte)frequency); mBigramList.addBigram(null, word, (byte)frequency);
} }

View File

@ -49,7 +49,16 @@ public final class UserHistoryDictIOUtils {
private static final String LAST_UPDATED_TIME_KEY = "date"; private static final String LAST_UPDATED_TIME_KEY = "date";
public interface OnAddWordListener { public interface OnAddWordListener {
public void setUnigram(final String word, final String shortcutTarget, final int frequency); /**
* Callback to be notified when a word is added to the dictionary.
* @param word The added word.
* @param shortcutTarget A shortcut target for this word, or null if none.
* @param frequency The frequency for this word.
* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist).
* Unspecified if shortcutTarget is null - do not rely on its value.
*/
public void setUnigram(final String word, final String shortcutTarget, final int frequency,
final int shortcutFreq);
public void setBigram(final String word1, final String word2, final int frequency); public void setBigram(final String word1, final String word2, final int frequency);
} }
@ -153,7 +162,7 @@ public final class UserHistoryDictIOUtils {
for (Entry<Integer, String> entry : unigrams.entrySet()) { for (Entry<Integer, String> entry : unigrams.entrySet()) {
final String word1 = entry.getValue(); final String word1 = entry.getValue();
final int unigramFrequency = frequencies.get(entry.getKey()); final int unigramFrequency = frequencies.get(entry.getKey());
to.setUnigram(word1, null, unigramFrequency); to.setUnigram(word1, null /* shortcutTarget */, unigramFrequency, 0 /* shortcutFreq */);
final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey()); final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());
if (attrList != null) { if (attrList != null) {
for (final PendingAttribute attr : attrList) { for (final PendingAttribute attr : attrList) {

View File

@ -26,13 +26,16 @@ import android.test.suitebuilder.annotation.SmallTest;
public class ExpandableDictionaryTests extends AndroidTestCase { public class ExpandableDictionaryTests extends AndroidTestCase {
private final static int UNIGRAM_FREQ = 50; private final static int UNIGRAM_FREQ = 50;
// See UserBinaryDictionary for more information about this variable.
// For tests, its actual value does not matter.
private final static int SHORTCUT_FREQ = 14;
public void testAddWordAndGetWordFrequency() { public void testAddWordAndGetWordFrequency() {
final ExpandableDictionary dict = new ExpandableDictionary(Dictionary.TYPE_USER); final ExpandableDictionary dict = new ExpandableDictionary(Dictionary.TYPE_USER);
// Add words // Add words
dict.addWord("abcde", "abcde", UNIGRAM_FREQ); dict.addWord("abcde", "abcde", UNIGRAM_FREQ, SHORTCUT_FREQ);
dict.addWord("abcef", null, UNIGRAM_FREQ + 1); dict.addWord("abcef", null, UNIGRAM_FREQ + 1, 0);
// Check words // Check words
assertFalse(dict.isValidWord("abcde")); assertFalse(dict.isValidWord("abcde"));
@ -40,16 +43,16 @@ public class ExpandableDictionaryTests extends AndroidTestCase {
assertTrue(dict.isValidWord("abcef")); assertTrue(dict.isValidWord("abcef"));
assertEquals(UNIGRAM_FREQ+1, dict.getWordFrequency("abcef")); assertEquals(UNIGRAM_FREQ+1, dict.getWordFrequency("abcef"));
dict.addWord("abc", null, UNIGRAM_FREQ + 2); dict.addWord("abc", null, UNIGRAM_FREQ + 2, 0);
assertTrue(dict.isValidWord("abc")); assertTrue(dict.isValidWord("abc"));
assertEquals(UNIGRAM_FREQ + 2, dict.getWordFrequency("abc")); assertEquals(UNIGRAM_FREQ + 2, dict.getWordFrequency("abc"));
// Add existing word with lower frequency // Add existing word with lower frequency
dict.addWord("abc", null, UNIGRAM_FREQ); dict.addWord("abc", null, UNIGRAM_FREQ, 0);
assertEquals(UNIGRAM_FREQ + 2, dict.getWordFrequency("abc")); assertEquals(UNIGRAM_FREQ + 2, dict.getWordFrequency("abc"));
// Add existing word with higher frequency // Add existing word with higher frequency
dict.addWord("abc", null, UNIGRAM_FREQ + 3); dict.addWord("abc", null, UNIGRAM_FREQ + 3, 0);
assertEquals(UNIGRAM_FREQ + 3, dict.getWordFrequency("abc")); assertEquals(UNIGRAM_FREQ + 3, dict.getWordFrequency("abc"));
} }
} }

View File

@ -196,8 +196,8 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
final UserHistoryDictionaryBigramList resultList = new UserHistoryDictionaryBigramList(); final UserHistoryDictionaryBigramList resultList = new UserHistoryDictionaryBigramList();
final OnAddWordListener listener = new OnAddWordListener() { final OnAddWordListener listener = new OnAddWordListener() {
@Override @Override
public void setUnigram(final String word, public void setUnigram(final String word, final String shortcutTarget,
final String shortcutTarget, final int frequency) { final int frequency, final int shortcutFreq) {
Log.d(TAG, "in: setUnigram: " + word + "," + frequency); Log.d(TAG, "in: setUnigram: " + word + "," + frequency);
resultList.addBigram(null, word, (byte)frequency); resultList.addBigram(null, word, (byte)frequency);
} }
@ -220,8 +220,8 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
final UserHistoryDictionaryBigramList resultList2 = new UserHistoryDictionaryBigramList(); final UserHistoryDictionaryBigramList resultList2 = new UserHistoryDictionaryBigramList();
final OnAddWordListener listener2 = new OnAddWordListener() { final OnAddWordListener listener2 = new OnAddWordListener() {
@Override @Override
public void setUnigram(final String word, public void setUnigram(final String word, final String shortcutTarget,
final String shortcutTarget, final int frequency) { final int frequency, final int shortcutFreq) {
Log.d(TAG, "in: setUnigram: " + word + "," + frequency); Log.d(TAG, "in: setUnigram: " + word + "," + frequency);
resultList2.addBigram(null, word, (byte)frequency); resultList2.addBigram(null, word, (byte)frequency);
} }