From c88f61215c5b9ca6e0cc3f776e3b7da19eec9cae Mon Sep 17 00:00:00 2001 From: Satoshi Kataoka Date: Tue, 29 May 2012 19:07:22 +0900 Subject: [PATCH] Set level 1 as the initial value of the valid words Bug: 4192129 Change-Id: I867e78ce79c78977d08e8b66881a25b6fe5bf41f --- .../inputmethod/latin/AutoCorrection.java | 20 +++++++++++++++- .../inputmethod/latin/BinaryDictionary.java | 10 +++++--- .../android/inputmethod/latin/Dictionary.java | 5 ++++ .../latin/DictionaryCollection.java | 12 ++++++++++ .../latin/ExpandableDictionary.java | 11 ++++----- .../android/inputmethod/latin/LatinIME.java | 6 ++++- .../latin/UserHistoryDictionary.java | 11 +++++---- .../UserHistoryForgettingCurveUtils.java | 23 +++++++++++++++---- 8 files changed, 77 insertions(+), 21 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/AutoCorrection.java b/java/src/com/android/inputmethod/latin/AutoCorrection.java index 32b213e67..e0452483c 100644 --- a/java/src/com/android/inputmethod/latin/AutoCorrection.java +++ b/java/src/com/android/inputmethod/latin/AutoCorrection.java @@ -50,7 +50,7 @@ public class AutoCorrection { } public static boolean isValidWord(final ConcurrentHashMap dictionaries, - CharSequence word, boolean ignoreCase) { + CharSequence word, boolean ignoreCase) { if (TextUtils.isEmpty(word)) { return false; } @@ -74,6 +74,24 @@ public class AutoCorrection { return false; } + public static int getMaxFrequency(final ConcurrentHashMap dictionaries, + CharSequence word) { + if (TextUtils.isEmpty(word)) { + return Dictionary.NOT_A_PROBABILITY; + } + int maxFreq = -1; + for (final String key : dictionaries.keySet()) { + if (key.equals(Suggest.DICT_KEY_WHITELIST)) continue; + final Dictionary dictionary = dictionaries.get(key); + if (null == dictionary) continue; + final int tempFreq = dictionary.getFrequency(word); + if (tempFreq >= maxFreq) { + maxFreq = tempFreq; + } + } + return maxFreq; + } + public static boolean allowsToBeAutoCorrected( final ConcurrentHashMap dictionaries, final CharSequence word, final boolean ignoreCase) { diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index cb1069cfb..d0613bd72 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -201,10 +201,14 @@ public class BinaryDictionary extends Dictionary { @Override public boolean isValidWord(CharSequence word) { - if (word == null) return false; + return getFrequency(word) >= 0; + } + + @Override + public int getFrequency(CharSequence word) { + if (word == null) return -1; int[] chars = StringUtils.toCodePointArray(word.toString()); - final int freq = getFrequencyNative(mNativeDict, chars, chars.length); - return freq >= 0; + return getFrequencyNative(mNativeDict, chars, chars.length); } // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni diff --git a/java/src/com/android/inputmethod/latin/Dictionary.java b/java/src/com/android/inputmethod/latin/Dictionary.java index 231e9ab81..7cd9bc2a8 100644 --- a/java/src/com/android/inputmethod/latin/Dictionary.java +++ b/java/src/com/android/inputmethod/latin/Dictionary.java @@ -31,6 +31,7 @@ public abstract class Dictionary { public static final int UNIGRAM = 0; public static final int BIGRAM = 1; + public static final int NOT_A_PROBABILITY = -1; /** * Interface to be implemented by classes requesting words to be fetched from the dictionary. * @see #getWords(WordComposer, CharSequence, WordCallback, ProximityInfo) @@ -84,6 +85,10 @@ public abstract class Dictionary { */ abstract public boolean isValidWord(CharSequence word); + public int getFrequency(CharSequence word) { + return NOT_A_PROBABILITY; + } + /** * Compares the contents of the character array with the typed word and returns true if they * are the same. diff --git a/java/src/com/android/inputmethod/latin/DictionaryCollection.java b/java/src/com/android/inputmethod/latin/DictionaryCollection.java index f3aa27a22..1a05fcd86 100644 --- a/java/src/com/android/inputmethod/latin/DictionaryCollection.java +++ b/java/src/com/android/inputmethod/latin/DictionaryCollection.java @@ -70,6 +70,18 @@ public class DictionaryCollection extends Dictionary { return false; } + @Override + public int getFrequency(CharSequence word) { + int maxFreq = -1; + for (int i = mDictionaries.size() - 1; i >= 0; --i) { + final int tempFreq = mDictionaries.get(i).getFrequency(word); + if (tempFreq >= maxFreq) { + maxFreq = tempFreq; + } + } + return maxFreq; + } + public boolean isEmpty() { return mDictionaries.isEmpty(); } diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java index 358cd4d4d..34a92fd30 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java @@ -84,8 +84,7 @@ public class ExpandableDictionary extends Dictionary { protected interface NextWord { public Node getWordNode(); public int getFrequency(); - /** FcValue is a bit set */ - public int getFcValue(); + public ForgettingCurveParams getFcParams(); public int notifyTypedAgainAndGetFrequency(); } @@ -108,8 +107,8 @@ public class ExpandableDictionary extends Dictionary { } @Override - public int getFcValue() { - return mFrequency; + public ForgettingCurveParams getFcParams() { + return null; } @Override @@ -138,8 +137,8 @@ public class ExpandableDictionary extends Dictionary { } @Override - public int getFcValue() { - return mFcp.getFc(); + public ForgettingCurveParams getFcParams() { + return mFcp; } @Override diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 52088812d..38549436b 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -2093,8 +2093,12 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } else { secondWord = suggestion.toString(); } + // We demote unrecognized word and words with 0-frequency (assuming they would be + // profanity etc.) by specifying them as "invalid". + final int maxFreq = AutoCorrection.getMaxFrequency( + mSuggest.getUnigramDictionaries(), suggestion); mUserHistoryDictionary.addToUserHistory(null == prevWord ? null : prevWord.toString(), - secondWord); + secondWord, maxFreq > 0); return prevWord; } return null; diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java index d5163f2a1..c8ad40b12 100644 --- a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java +++ b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java @@ -148,7 +148,7 @@ public class UserHistoryDictionary extends ExpandableDictionary { * context, as in beginning of a sentence for example. * The second word may not be null (a NullPointerException would be thrown). */ - public int addToUserHistory(final String word1, String word2) { + public int addToUserHistory(final String word1, String word2, boolean isValid) { super.addWord(word2, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED); // Do not insert a word as a bigram of itself if (word2.equals(word1)) { @@ -158,7 +158,7 @@ public class UserHistoryDictionary extends ExpandableDictionary { if (null == word1) { freq = FREQUENCY_FOR_TYPED; } else { - freq = super.setBigramAndGetFrequency(word1, word2, new ForgettingCurveParams()); + freq = super.setBigramAndGetFrequency(word1, word2, new ForgettingCurveParams(isValid)); } synchronized (mPendingWritesLock) { mBigramList.addBigram(word1, word2); @@ -416,10 +416,11 @@ public class UserHistoryDictionary extends ExpandableDictionary { } else { final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2); if (nw != null) { - final int tempFreq = nw.getFcValue(); - // TODO: Check whether the word is valid or not + final ForgettingCurveParams fcp = nw.getFcParams(); + final int tempFreq = fcp.getFc(); + final boolean isValid = fcp.isValid(); if (UserHistoryForgettingCurveUtils.needsToSave( - (byte)tempFreq, false, addLevel0Bigram)) { + (byte)tempFreq, isValid, addLevel0Bigram)) { freq = tempFreq; } else { freq = -1; diff --git a/java/src/com/android/inputmethod/latin/UserHistoryForgettingCurveUtils.java b/java/src/com/android/inputmethod/latin/UserHistoryForgettingCurveUtils.java index feb1d0029..9cd8c6778 100644 --- a/java/src/com/android/inputmethod/latin/UserHistoryForgettingCurveUtils.java +++ b/java/src/com/android/inputmethod/latin/UserHistoryForgettingCurveUtils.java @@ -38,26 +38,39 @@ public class UserHistoryForgettingCurveUtils { public static class ForgettingCurveParams { private byte mFc; long mLastTouchedTime = 0; + private final boolean mIsValid; private void updateLastTouchedTime() { mLastTouchedTime = System.currentTimeMillis(); } - public ForgettingCurveParams() { - // TODO: Check whether this word is valid or not - this(System.currentTimeMillis()); + public ForgettingCurveParams(boolean isValid) { + this(System.currentTimeMillis(), isValid); } - private ForgettingCurveParams(long now) { - this((int)pushCount((byte)0, false), now, now); + private ForgettingCurveParams(long now, boolean isValid) { + this((int)pushCount((byte)0, isValid), now, now, isValid); } + /** This constructor is called when the user history bigram dictionary is being restored. */ public ForgettingCurveParams(int fc, long now, long last) { + // All words with level >= 1 had been saved. + // Invalid words with level == 0 had been saved. + // Valid words words with level == 0 had *not* been saved. + this(fc, now, last, fcToLevel((byte)fc) > 0); + } + + private ForgettingCurveParams(int fc, long now, long last, boolean isValid) { + mIsValid = isValid; mFc = (byte)fc; mLastTouchedTime = last; updateElapsedTime(now); } + public boolean isValid() { + return mIsValid; + } + public byte getFc() { updateElapsedTime(System.currentTimeMillis()); return mFc;