am 408fa1ac: am 961e676b: Merge "Set level 1 as the initial value of the valid words" into jb-dev

* commit '408fa1ac48a3eb6f378274d5dd0e490da0f453ae':
  Set level 1 as the initial value of the valid words
This commit is contained in:
Satoshi Kataoka 2012-05-29 04:49:33 -07:00 committed by Android Git Automerger
commit c5d4362b59
8 changed files with 77 additions and 21 deletions

View file

@ -50,7 +50,7 @@ public class AutoCorrection {
} }
public static boolean isValidWord(final ConcurrentHashMap<String, Dictionary> dictionaries, public static boolean isValidWord(final ConcurrentHashMap<String, Dictionary> dictionaries,
CharSequence word, boolean ignoreCase) { CharSequence word, boolean ignoreCase) {
if (TextUtils.isEmpty(word)) { if (TextUtils.isEmpty(word)) {
return false; return false;
} }
@ -74,6 +74,24 @@ public class AutoCorrection {
return false; return false;
} }
public static int getMaxFrequency(final ConcurrentHashMap<String, Dictionary> dictionaries,
CharSequence word) {
if (TextUtils.isEmpty(word)) {
return Dictionary.NOT_A_PROBABILITY;
}
int maxFreq = -1;
for (final String key : dictionaries.keySet()) {
if (key.equals(Suggest.DICT_KEY_WHITELIST)) continue;
final Dictionary dictionary = dictionaries.get(key);
if (null == dictionary) continue;
final int tempFreq = dictionary.getFrequency(word);
if (tempFreq >= maxFreq) {
maxFreq = tempFreq;
}
}
return maxFreq;
}
public static boolean allowsToBeAutoCorrected( public static boolean allowsToBeAutoCorrected(
final ConcurrentHashMap<String, Dictionary> dictionaries, final ConcurrentHashMap<String, Dictionary> dictionaries,
final CharSequence word, final boolean ignoreCase) { final CharSequence word, final boolean ignoreCase) {

View file

@ -201,10 +201,14 @@ public class BinaryDictionary extends Dictionary {
@Override @Override
public boolean isValidWord(CharSequence word) { public boolean isValidWord(CharSequence word) {
if (word == null) return false; return getFrequency(word) >= 0;
}
@Override
public int getFrequency(CharSequence word) {
if (word == null) return -1;
int[] chars = StringUtils.toCodePointArray(word.toString()); int[] chars = StringUtils.toCodePointArray(word.toString());
final int freq = getFrequencyNative(mNativeDict, chars, chars.length); return getFrequencyNative(mNativeDict, chars, chars.length);
return freq >= 0;
} }
// TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni

View file

@ -31,6 +31,7 @@ public abstract class Dictionary {
public static final int UNIGRAM = 0; public static final int UNIGRAM = 0;
public static final int BIGRAM = 1; public static final int BIGRAM = 1;
public static final int NOT_A_PROBABILITY = -1;
/** /**
* Interface to be implemented by classes requesting words to be fetched from the dictionary. * Interface to be implemented by classes requesting words to be fetched from the dictionary.
* @see #getWords(WordComposer, CharSequence, WordCallback, ProximityInfo) * @see #getWords(WordComposer, CharSequence, WordCallback, ProximityInfo)
@ -84,6 +85,10 @@ public abstract class Dictionary {
*/ */
abstract public boolean isValidWord(CharSequence word); abstract public boolean isValidWord(CharSequence word);
public int getFrequency(CharSequence word) {
return NOT_A_PROBABILITY;
}
/** /**
* Compares the contents of the character array with the typed word and returns true if they * Compares the contents of the character array with the typed word and returns true if they
* are the same. * are the same.

View file

@ -70,6 +70,18 @@ public class DictionaryCollection extends Dictionary {
return false; return false;
} }
@Override
public int getFrequency(CharSequence word) {
int maxFreq = -1;
for (int i = mDictionaries.size() - 1; i >= 0; --i) {
final int tempFreq = mDictionaries.get(i).getFrequency(word);
if (tempFreq >= maxFreq) {
maxFreq = tempFreq;
}
}
return maxFreq;
}
public boolean isEmpty() { public boolean isEmpty() {
return mDictionaries.isEmpty(); return mDictionaries.isEmpty();
} }

View file

@ -84,8 +84,7 @@ public class ExpandableDictionary extends Dictionary {
protected interface NextWord { protected interface NextWord {
public Node getWordNode(); public Node getWordNode();
public int getFrequency(); public int getFrequency();
/** FcValue is a bit set */ public ForgettingCurveParams getFcParams();
public int getFcValue();
public int notifyTypedAgainAndGetFrequency(); public int notifyTypedAgainAndGetFrequency();
} }
@ -108,8 +107,8 @@ public class ExpandableDictionary extends Dictionary {
} }
@Override @Override
public int getFcValue() { public ForgettingCurveParams getFcParams() {
return mFrequency; return null;
} }
@Override @Override
@ -138,8 +137,8 @@ public class ExpandableDictionary extends Dictionary {
} }
@Override @Override
public int getFcValue() { public ForgettingCurveParams getFcParams() {
return mFcp.getFc(); return mFcp;
} }
@Override @Override

View file

@ -2104,8 +2104,12 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
} else { } else {
secondWord = suggestion.toString(); secondWord = suggestion.toString();
} }
// We demote unrecognized word and words with 0-frequency (assuming they would be
// profanity etc.) by specifying them as "invalid".
final int maxFreq = AutoCorrection.getMaxFrequency(
mSuggest.getUnigramDictionaries(), suggestion);
mUserHistoryDictionary.addToUserHistory(null == prevWord ? null : prevWord.toString(), mUserHistoryDictionary.addToUserHistory(null == prevWord ? null : prevWord.toString(),
secondWord); secondWord, maxFreq > 0);
return prevWord; return prevWord;
} }
return null; return null;

View file

@ -148,7 +148,7 @@ public class UserHistoryDictionary extends ExpandableDictionary {
* context, as in beginning of a sentence for example. * context, as in beginning of a sentence for example.
* The second word may not be null (a NullPointerException would be thrown). * The second word may not be null (a NullPointerException would be thrown).
*/ */
public int addToUserHistory(final String word1, String word2) { public int addToUserHistory(final String word1, String word2, boolean isValid) {
super.addWord(word2, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED); super.addWord(word2, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED);
// Do not insert a word as a bigram of itself // Do not insert a word as a bigram of itself
if (word2.equals(word1)) { if (word2.equals(word1)) {
@ -158,7 +158,7 @@ public class UserHistoryDictionary extends ExpandableDictionary {
if (null == word1) { if (null == word1) {
freq = FREQUENCY_FOR_TYPED; freq = FREQUENCY_FOR_TYPED;
} else { } else {
freq = super.setBigramAndGetFrequency(word1, word2, new ForgettingCurveParams()); freq = super.setBigramAndGetFrequency(word1, word2, new ForgettingCurveParams(isValid));
} }
synchronized (mPendingWritesLock) { synchronized (mPendingWritesLock) {
mBigramList.addBigram(word1, word2); mBigramList.addBigram(word1, word2);
@ -416,10 +416,11 @@ public class UserHistoryDictionary extends ExpandableDictionary {
} else { } else {
final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2); final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2);
if (nw != null) { if (nw != null) {
final int tempFreq = nw.getFcValue(); final ForgettingCurveParams fcp = nw.getFcParams();
// TODO: Check whether the word is valid or not final int tempFreq = fcp.getFc();
final boolean isValid = fcp.isValid();
if (UserHistoryForgettingCurveUtils.needsToSave( if (UserHistoryForgettingCurveUtils.needsToSave(
(byte)tempFreq, false, addLevel0Bigram)) { (byte)tempFreq, isValid, addLevel0Bigram)) {
freq = tempFreq; freq = tempFreq;
} else { } else {
freq = -1; freq = -1;

View file

@ -38,26 +38,39 @@ public class UserHistoryForgettingCurveUtils {
public static class ForgettingCurveParams { public static class ForgettingCurveParams {
private byte mFc; private byte mFc;
long mLastTouchedTime = 0; long mLastTouchedTime = 0;
private final boolean mIsValid;
private void updateLastTouchedTime() { private void updateLastTouchedTime() {
mLastTouchedTime = System.currentTimeMillis(); mLastTouchedTime = System.currentTimeMillis();
} }
public ForgettingCurveParams() { public ForgettingCurveParams(boolean isValid) {
// TODO: Check whether this word is valid or not this(System.currentTimeMillis(), isValid);
this(System.currentTimeMillis());
} }
private ForgettingCurveParams(long now) { private ForgettingCurveParams(long now, boolean isValid) {
this((int)pushCount((byte)0, false), now, now); this((int)pushCount((byte)0, isValid), now, now, isValid);
} }
/** This constructor is called when the user history bigram dictionary is being restored. */
public ForgettingCurveParams(int fc, long now, long last) { public ForgettingCurveParams(int fc, long now, long last) {
// All words with level >= 1 had been saved.
// Invalid words with level == 0 had been saved.
// Valid words words with level == 0 had *not* been saved.
this(fc, now, last, fcToLevel((byte)fc) > 0);
}
private ForgettingCurveParams(int fc, long now, long last, boolean isValid) {
mIsValid = isValid;
mFc = (byte)fc; mFc = (byte)fc;
mLastTouchedTime = last; mLastTouchedTime = last;
updateElapsedTime(now); updateElapsedTime(now);
} }
public boolean isValid() {
return mIsValid;
}
public byte getFc() { public byte getFc() {
updateElapsedTime(System.currentTimeMillis()); updateElapsedTime(System.currentTimeMillis());
return mFc; return mFc;