* commit '408fa1ac48a3eb6f378274d5dd0e490da0f453ae': Set level 1 as the initial value of the valid words
This commit is contained in:
commit
c5d4362b59
8 changed files with 77 additions and 21 deletions
|
@ -50,7 +50,7 @@ public class AutoCorrection {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isValidWord(final ConcurrentHashMap<String, Dictionary> dictionaries,
|
public static boolean isValidWord(final ConcurrentHashMap<String, Dictionary> dictionaries,
|
||||||
CharSequence word, boolean ignoreCase) {
|
CharSequence word, boolean ignoreCase) {
|
||||||
if (TextUtils.isEmpty(word)) {
|
if (TextUtils.isEmpty(word)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -74,6 +74,24 @@ public class AutoCorrection {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static int getMaxFrequency(final ConcurrentHashMap<String, Dictionary> dictionaries,
|
||||||
|
CharSequence word) {
|
||||||
|
if (TextUtils.isEmpty(word)) {
|
||||||
|
return Dictionary.NOT_A_PROBABILITY;
|
||||||
|
}
|
||||||
|
int maxFreq = -1;
|
||||||
|
for (final String key : dictionaries.keySet()) {
|
||||||
|
if (key.equals(Suggest.DICT_KEY_WHITELIST)) continue;
|
||||||
|
final Dictionary dictionary = dictionaries.get(key);
|
||||||
|
if (null == dictionary) continue;
|
||||||
|
final int tempFreq = dictionary.getFrequency(word);
|
||||||
|
if (tempFreq >= maxFreq) {
|
||||||
|
maxFreq = tempFreq;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return maxFreq;
|
||||||
|
}
|
||||||
|
|
||||||
public static boolean allowsToBeAutoCorrected(
|
public static boolean allowsToBeAutoCorrected(
|
||||||
final ConcurrentHashMap<String, Dictionary> dictionaries,
|
final ConcurrentHashMap<String, Dictionary> dictionaries,
|
||||||
final CharSequence word, final boolean ignoreCase) {
|
final CharSequence word, final boolean ignoreCase) {
|
||||||
|
|
|
@ -201,10 +201,14 @@ public class BinaryDictionary extends Dictionary {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isValidWord(CharSequence word) {
|
public boolean isValidWord(CharSequence word) {
|
||||||
if (word == null) return false;
|
return getFrequency(word) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getFrequency(CharSequence word) {
|
||||||
|
if (word == null) return -1;
|
||||||
int[] chars = StringUtils.toCodePointArray(word.toString());
|
int[] chars = StringUtils.toCodePointArray(word.toString());
|
||||||
final int freq = getFrequencyNative(mNativeDict, chars, chars.length);
|
return getFrequencyNative(mNativeDict, chars, chars.length);
|
||||||
return freq >= 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
|
// TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
|
||||||
|
|
|
@ -31,6 +31,7 @@ public abstract class Dictionary {
|
||||||
public static final int UNIGRAM = 0;
|
public static final int UNIGRAM = 0;
|
||||||
public static final int BIGRAM = 1;
|
public static final int BIGRAM = 1;
|
||||||
|
|
||||||
|
public static final int NOT_A_PROBABILITY = -1;
|
||||||
/**
|
/**
|
||||||
* Interface to be implemented by classes requesting words to be fetched from the dictionary.
|
* Interface to be implemented by classes requesting words to be fetched from the dictionary.
|
||||||
* @see #getWords(WordComposer, CharSequence, WordCallback, ProximityInfo)
|
* @see #getWords(WordComposer, CharSequence, WordCallback, ProximityInfo)
|
||||||
|
@ -84,6 +85,10 @@ public abstract class Dictionary {
|
||||||
*/
|
*/
|
||||||
abstract public boolean isValidWord(CharSequence word);
|
abstract public boolean isValidWord(CharSequence word);
|
||||||
|
|
||||||
|
public int getFrequency(CharSequence word) {
|
||||||
|
return NOT_A_PROBABILITY;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compares the contents of the character array with the typed word and returns true if they
|
* Compares the contents of the character array with the typed word and returns true if they
|
||||||
* are the same.
|
* are the same.
|
||||||
|
|
|
@ -70,6 +70,18 @@ public class DictionaryCollection extends Dictionary {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getFrequency(CharSequence word) {
|
||||||
|
int maxFreq = -1;
|
||||||
|
for (int i = mDictionaries.size() - 1; i >= 0; --i) {
|
||||||
|
final int tempFreq = mDictionaries.get(i).getFrequency(word);
|
||||||
|
if (tempFreq >= maxFreq) {
|
||||||
|
maxFreq = tempFreq;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return maxFreq;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isEmpty() {
|
public boolean isEmpty() {
|
||||||
return mDictionaries.isEmpty();
|
return mDictionaries.isEmpty();
|
||||||
}
|
}
|
||||||
|
|
|
@ -84,8 +84,7 @@ public class ExpandableDictionary extends Dictionary {
|
||||||
protected interface NextWord {
|
protected interface NextWord {
|
||||||
public Node getWordNode();
|
public Node getWordNode();
|
||||||
public int getFrequency();
|
public int getFrequency();
|
||||||
/** FcValue is a bit set */
|
public ForgettingCurveParams getFcParams();
|
||||||
public int getFcValue();
|
|
||||||
public int notifyTypedAgainAndGetFrequency();
|
public int notifyTypedAgainAndGetFrequency();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -108,8 +107,8 @@ public class ExpandableDictionary extends Dictionary {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getFcValue() {
|
public ForgettingCurveParams getFcParams() {
|
||||||
return mFrequency;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -138,8 +137,8 @@ public class ExpandableDictionary extends Dictionary {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getFcValue() {
|
public ForgettingCurveParams getFcParams() {
|
||||||
return mFcp.getFc();
|
return mFcp;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -2104,8 +2104,12 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
|
||||||
} else {
|
} else {
|
||||||
secondWord = suggestion.toString();
|
secondWord = suggestion.toString();
|
||||||
}
|
}
|
||||||
|
// We demote unrecognized word and words with 0-frequency (assuming they would be
|
||||||
|
// profanity etc.) by specifying them as "invalid".
|
||||||
|
final int maxFreq = AutoCorrection.getMaxFrequency(
|
||||||
|
mSuggest.getUnigramDictionaries(), suggestion);
|
||||||
mUserHistoryDictionary.addToUserHistory(null == prevWord ? null : prevWord.toString(),
|
mUserHistoryDictionary.addToUserHistory(null == prevWord ? null : prevWord.toString(),
|
||||||
secondWord);
|
secondWord, maxFreq > 0);
|
||||||
return prevWord;
|
return prevWord;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
|
|
|
@ -148,7 +148,7 @@ public class UserHistoryDictionary extends ExpandableDictionary {
|
||||||
* context, as in beginning of a sentence for example.
|
* context, as in beginning of a sentence for example.
|
||||||
* The second word may not be null (a NullPointerException would be thrown).
|
* The second word may not be null (a NullPointerException would be thrown).
|
||||||
*/
|
*/
|
||||||
public int addToUserHistory(final String word1, String word2) {
|
public int addToUserHistory(final String word1, String word2, boolean isValid) {
|
||||||
super.addWord(word2, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED);
|
super.addWord(word2, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED);
|
||||||
// Do not insert a word as a bigram of itself
|
// Do not insert a word as a bigram of itself
|
||||||
if (word2.equals(word1)) {
|
if (word2.equals(word1)) {
|
||||||
|
@ -158,7 +158,7 @@ public class UserHistoryDictionary extends ExpandableDictionary {
|
||||||
if (null == word1) {
|
if (null == word1) {
|
||||||
freq = FREQUENCY_FOR_TYPED;
|
freq = FREQUENCY_FOR_TYPED;
|
||||||
} else {
|
} else {
|
||||||
freq = super.setBigramAndGetFrequency(word1, word2, new ForgettingCurveParams());
|
freq = super.setBigramAndGetFrequency(word1, word2, new ForgettingCurveParams(isValid));
|
||||||
}
|
}
|
||||||
synchronized (mPendingWritesLock) {
|
synchronized (mPendingWritesLock) {
|
||||||
mBigramList.addBigram(word1, word2);
|
mBigramList.addBigram(word1, word2);
|
||||||
|
@ -416,10 +416,11 @@ public class UserHistoryDictionary extends ExpandableDictionary {
|
||||||
} else {
|
} else {
|
||||||
final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2);
|
final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2);
|
||||||
if (nw != null) {
|
if (nw != null) {
|
||||||
final int tempFreq = nw.getFcValue();
|
final ForgettingCurveParams fcp = nw.getFcParams();
|
||||||
// TODO: Check whether the word is valid or not
|
final int tempFreq = fcp.getFc();
|
||||||
|
final boolean isValid = fcp.isValid();
|
||||||
if (UserHistoryForgettingCurveUtils.needsToSave(
|
if (UserHistoryForgettingCurveUtils.needsToSave(
|
||||||
(byte)tempFreq, false, addLevel0Bigram)) {
|
(byte)tempFreq, isValid, addLevel0Bigram)) {
|
||||||
freq = tempFreq;
|
freq = tempFreq;
|
||||||
} else {
|
} else {
|
||||||
freq = -1;
|
freq = -1;
|
||||||
|
|
|
@ -38,26 +38,39 @@ public class UserHistoryForgettingCurveUtils {
|
||||||
public static class ForgettingCurveParams {
|
public static class ForgettingCurveParams {
|
||||||
private byte mFc;
|
private byte mFc;
|
||||||
long mLastTouchedTime = 0;
|
long mLastTouchedTime = 0;
|
||||||
|
private final boolean mIsValid;
|
||||||
|
|
||||||
private void updateLastTouchedTime() {
|
private void updateLastTouchedTime() {
|
||||||
mLastTouchedTime = System.currentTimeMillis();
|
mLastTouchedTime = System.currentTimeMillis();
|
||||||
}
|
}
|
||||||
|
|
||||||
public ForgettingCurveParams() {
|
public ForgettingCurveParams(boolean isValid) {
|
||||||
// TODO: Check whether this word is valid or not
|
this(System.currentTimeMillis(), isValid);
|
||||||
this(System.currentTimeMillis());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private ForgettingCurveParams(long now) {
|
private ForgettingCurveParams(long now, boolean isValid) {
|
||||||
this((int)pushCount((byte)0, false), now, now);
|
this((int)pushCount((byte)0, isValid), now, now, isValid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** This constructor is called when the user history bigram dictionary is being restored. */
|
||||||
public ForgettingCurveParams(int fc, long now, long last) {
|
public ForgettingCurveParams(int fc, long now, long last) {
|
||||||
|
// All words with level >= 1 had been saved.
|
||||||
|
// Invalid words with level == 0 had been saved.
|
||||||
|
// Valid words words with level == 0 had *not* been saved.
|
||||||
|
this(fc, now, last, fcToLevel((byte)fc) > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ForgettingCurveParams(int fc, long now, long last, boolean isValid) {
|
||||||
|
mIsValid = isValid;
|
||||||
mFc = (byte)fc;
|
mFc = (byte)fc;
|
||||||
mLastTouchedTime = last;
|
mLastTouchedTime = last;
|
||||||
updateElapsedTime(now);
|
updateElapsedTime(now);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isValid() {
|
||||||
|
return mIsValid;
|
||||||
|
}
|
||||||
|
|
||||||
public byte getFc() {
|
public byte getFc() {
|
||||||
updateElapsedTime(System.currentTimeMillis());
|
updateElapsedTime(System.currentTimeMillis());
|
||||||
return mFc;
|
return mFc;
|
||||||
|
|
Loading…
Reference in a new issue