Add Utils.equalsIgnoreCase methods

This change also corrects usage of "frequency", "priority" and "score"
* Frequency is the relative probability in dictionary.
* Score is the relative probability in suggestions.
* Priority is kind a sorted score.

Change-Id: Iafb135a4ecdb401cc505014a07c74dfcac44d699
main
Tadashi G. Takaoka 2011-03-15 11:46:32 -07:00
parent 027992afdc
commit e7a2512aa3
7 changed files with 134 additions and 86 deletions

View File

@ -18,3 +18,7 @@
-keep class com.android.inputmethod.latin.AutoCorrection {
java.lang.CharSequence getAutoCorrectionWord();
}
-keep class com.android.inputmethod.latin.Utils {
boolean equalsIgnoreCase(...);
}

View File

@ -48,7 +48,7 @@ public class AutoCorrection {
}
public void updateAutoCorrectionStatus(Map<String, Dictionary> dictionaries,
WordComposer wordComposer, ArrayList<CharSequence> suggestions, int[] priorities,
WordComposer wordComposer, ArrayList<CharSequence> suggestions, int[] sortedScores,
CharSequence typedWord, double autoCorrectionThreshold, int correctionMode,
CharSequence quickFixedWord, CharSequence whitelistedWord) {
if (hasAutoCorrectionForWhitelistedWord(whitelistedWord)) {
@ -62,7 +62,7 @@ public class AutoCorrection {
mHasAutoCorrection = true;
mAutoCorrectionWord = quickFixedWord;
} else if (hasAutoCorrectionForBinaryDictionary(wordComposer, suggestions, correctionMode,
priorities, typedWord, autoCorrectionThreshold)) {
sortedScores, typedWord, autoCorrectionThreshold)) {
mHasAutoCorrection = true;
mAutoCorrectionWord = suggestions.get(0);
}
@ -114,13 +114,13 @@ public class AutoCorrection {
}
private boolean hasAutoCorrectionForBinaryDictionary(WordComposer wordComposer,
ArrayList<CharSequence> suggestions, int correctionMode, int[] priorities,
ArrayList<CharSequence> suggestions, int correctionMode, int[] sortedScores,
CharSequence typedWord, double autoCorrectionThreshold) {
if (wordComposer.size() > 1 && (correctionMode == Suggest.CORRECTION_FULL
|| correctionMode == Suggest.CORRECTION_FULL_BIGRAM)
&& typedWord != null && suggestions.size() > 0 && priorities.length > 0) {
&& typedWord != null && suggestions.size() > 0 && sortedScores.length > 0) {
final CharSequence autoCorrectionCandidate = suggestions.get(0);
final int autoCorrectionCandidateScore = priorities[0];
final int autoCorrectionCandidateScore = sortedScores[0];
// TODO: when the normalized score of the first suggestion is nearly equals to
// the normalized score of the second suggestion, behave less aggressive.
mNormalizedScore = Utils.calcNormalizedScore(

View File

@ -54,8 +54,8 @@ public class BinaryDictionary extends Dictionary {
private final int[] mInputCodes = new int[MAX_WORD_LENGTH * MAX_PROXIMITY_CHARS_SIZE];
private final char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_WORDS];
private final char[] mOutputChars_bigrams = new char[MAX_WORD_LENGTH * MAX_BIGRAMS];
private final int[] mFrequencies = new int[MAX_WORDS];
private final int[] mFrequencies_bigrams = new int[MAX_BIGRAMS];
private final int[] mScores = new int[MAX_WORDS];
private final int[] mBigramScores = new int[MAX_BIGRAMS];
private final KeyboardSwitcher mKeyboardSwitcher = KeyboardSwitcher.getInstance();
private final SubtypeSwitcher mSubtypeSwitcher = SubtypeSwitcher.getInstance();
@ -149,14 +149,14 @@ public class BinaryDictionary extends Dictionary {
private native boolean isValidWordNative(int nativeData, char[] word, int wordLength);
private native int getSuggestionsNative(int dict, int proximityInfo, int[] xCoordinates,
int[] yCoordinates, int[] inputCodes, int codesSize, int flags, char[] outputChars,
int[] frequencies);
int[] scores);
private native int getBigramsNative(int dict, char[] prevWord, int prevWordLength,
int[] inputCodes, int inputCodesLength, char[] outputChars, int[] frequencies,
int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores,
int maxWordLength, int maxBigrams, int maxAlternatives);
private final void loadDictionary(String path, long startOffset, long length) {
mNativeDict = openNative(path, startOffset, length,
TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER,
TYPED_LETTER_MULTIPLIER, FULL_WORD_SCORE_MULTIPLIER,
MAX_WORD_LENGTH, MAX_WORDS, MAX_PROXIMITY_CHARS_SIZE);
mDictLength = length;
}
@ -168,7 +168,7 @@ public class BinaryDictionary extends Dictionary {
char[] chars = previousWord.toString().toCharArray();
Arrays.fill(mOutputChars_bigrams, (char) 0);
Arrays.fill(mFrequencies_bigrams, 0);
Arrays.fill(mBigramScores, 0);
int codesSize = codes.size();
Arrays.fill(mInputCodes, -1);
@ -177,18 +177,18 @@ public class BinaryDictionary extends Dictionary {
Math.min(alternatives.length, MAX_PROXIMITY_CHARS_SIZE));
int count = getBigramsNative(mNativeDict, chars, chars.length, mInputCodes, codesSize,
mOutputChars_bigrams, mFrequencies_bigrams, MAX_WORD_LENGTH, MAX_BIGRAMS,
mOutputChars_bigrams, mBigramScores, MAX_WORD_LENGTH, MAX_BIGRAMS,
MAX_PROXIMITY_CHARS_SIZE);
for (int j = 0; j < count; ++j) {
if (mFrequencies_bigrams[j] < 1) break;
if (mBigramScores[j] < 1) break;
final int start = j * MAX_WORD_LENGTH;
int len = 0;
while (len < MAX_WORD_LENGTH && mOutputChars_bigrams[start + len] != 0) {
++len;
}
if (len > 0) {
callback.addWord(mOutputChars_bigrams, start, len, mFrequencies_bigrams[j],
callback.addWord(mOutputChars_bigrams, start, len, mBigramScores[j],
mDicTypeId, DataType.BIGRAM);
}
}
@ -197,17 +197,17 @@ public class BinaryDictionary extends Dictionary {
@Override
public void getWords(final WordComposer codes, final WordCallback callback) {
final int count = getSuggestions(codes, mKeyboardSwitcher.getLatinKeyboard(),
mOutputChars, mFrequencies);
mOutputChars, mScores);
for (int j = 0; j < count; ++j) {
if (mFrequencies[j] < 1) break;
if (mScores[j] < 1) break;
final int start = j * MAX_WORD_LENGTH;
int len = 0;
while (len < MAX_WORD_LENGTH && mOutputChars[start + len] != 0) {
++len;
}
if (len > 0) {
callback.addWord(mOutputChars, start, len, mFrequencies[j], mDicTypeId,
callback.addWord(mOutputChars, start, len, mScores[j], mDicTypeId,
DataType.UNIGRAM);
}
}
@ -218,7 +218,7 @@ public class BinaryDictionary extends Dictionary {
}
/* package for test */ int getSuggestions(final WordComposer codes, final Keyboard keyboard,
char[] outputChars, int[] frequencies) {
char[] outputChars, int[] scores) {
if (!isValidDictionary()) return -1;
final int codesSize = codes.size();
@ -232,12 +232,12 @@ public class BinaryDictionary extends Dictionary {
Math.min(alternatives.length, MAX_PROXIMITY_CHARS_SIZE));
}
Arrays.fill(outputChars, (char) 0);
Arrays.fill(frequencies, 0);
Arrays.fill(scores, 0);
return getSuggestionsNative(
mNativeDict, keyboard.getProximityInfo(),
codes.getXCoordinates(), codes.getYCoordinates(), mInputCodes, codesSize,
mFlags, outputChars, frequencies);
mFlags, outputChars, scores);
}
@Override

View File

@ -29,7 +29,7 @@ public abstract class Dictionary {
/**
* The weight to give to a word if it's length is the same as the number of typed characters.
*/
protected static final int FULL_WORD_FREQ_MULTIPLIER = 2;
protected static final int FULL_WORD_SCORE_MULTIPLIER = 2;
public static enum DataType {
UNIGRAM, BIGRAM
@ -42,17 +42,17 @@ public abstract class Dictionary {
public interface WordCallback {
/**
* Adds a word to a list of suggestions. The word is expected to be ordered based on
* the provided frequency.
* the provided score.
* @param word the character array containing the word
* @param wordOffset starting offset of the word in the character array
* @param wordLength length of valid characters in the character array
* @param frequency the frequency of occurrence. This is normalized between 1 and 255, but
* @param score the score of occurrence. This is normalized between 1 and 255, but
* can exceed those limits
* @param dicTypeId of the dictionary where word was from
* @param dataType tells type of this data
* @return true if the word was added, false if no more words are required
*/
boolean addWord(char[] word, int wordOffset, int wordLength, int frequency, int dicTypeId,
boolean addWord(char[] word, int wordOffset, int wordLength, int score, int dicTypeId,
DataType dataType);
}

View File

@ -327,7 +327,7 @@ public class ExpandableDictionary extends Dictionary {
final int finalFreq;
if (skipPos < 0) {
finalFreq = freq * snr * addedAttenuation
* FULL_WORD_FREQ_MULTIPLIER;
* FULL_WORD_SCORE_MULTIPLIER;
} else {
finalFreq = computeSkippedWordFinalFreq(freq,
snr * addedAttenuation, mInputLength);

View File

@ -47,7 +47,7 @@ public class Suggest implements Dictionary.WordCallback {
/**
* Words that appear in both bigram and unigram data gets multiplier ranging from
* BIGRAM_MULTIPLIER_MIN to BIGRAM_MULTIPLIER_MAX depending on the frequency score from
* BIGRAM_MULTIPLIER_MIN to BIGRAM_MULTIPLIER_MAX depending on the score from
* bigram data.
*/
public static final double BIGRAM_MULTIPLIER_MIN = 1.2;
@ -92,13 +92,13 @@ public class Suggest implements Dictionary.WordCallback {
private boolean mQuickFixesEnabled;
private double mAutoCorrectionThreshold;
private int[] mPriorities = new int[mPrefMaxSuggestions];
private int[] mBigramPriorities = new int[PREF_MAX_BIGRAMS];
private int[] mScores = new int[mPrefMaxSuggestions];
private int[] mBigramScores = new int[PREF_MAX_BIGRAMS];
private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>();
ArrayList<CharSequence> mBigramSuggestions = new ArrayList<CharSequence>();
private ArrayList<CharSequence> mStringPool = new ArrayList<CharSequence>();
private String mLowerOriginalWord;
private CharSequence mTypedWord;
// TODO: Remove these member variables by passing more context to addWord() callback method
private boolean mIsFirstCharCapitalized;
@ -207,8 +207,8 @@ public class Suggest implements Dictionary.WordCallback {
throw new IllegalArgumentException("maxSuggestions must be between 1 and 100");
}
mPrefMaxSuggestions = maxSuggestions;
mPriorities = new int[mPrefMaxSuggestions];
mBigramPriorities = new int[PREF_MAX_BIGRAMS];
mScores = new int[mPrefMaxSuggestions];
mBigramScores = new int[PREF_MAX_BIGRAMS];
collectGarbage(mSuggestions, mPrefMaxSuggestions);
while (mStringPool.size() < mPrefMaxSuggestions) {
StringBuilder sb = new StringBuilder(getApproxMaxWordLength());
@ -256,25 +256,23 @@ public class Suggest implements Dictionary.WordCallback {
mIsFirstCharCapitalized = wordComposer.isFirstCharCapitalized();
mIsAllUpperCase = wordComposer.isAllUpperCase();
collectGarbage(mSuggestions, mPrefMaxSuggestions);
Arrays.fill(mPriorities, 0);
Arrays.fill(mScores, 0);
// Save a lowercase version of the original word
CharSequence typedWord = wordComposer.getTypedWord();
if (typedWord != null) {
final String typedWordString = typedWord.toString();
typedWord = typedWordString;
mLowerOriginalWord = typedWordString.toLowerCase();
// Treating USER_TYPED as UNIGRAM suggestion for logging now.
LatinImeLogger.onAddSuggestedWord(typedWordString, Suggest.DIC_USER_TYPED,
Dictionary.DataType.UNIGRAM);
} else {
mLowerOriginalWord = "";
}
mTypedWord = typedWord;
if (wordComposer.size() == 1 && (mCorrectionMode == CORRECTION_FULL_BIGRAM
|| mCorrectionMode == CORRECTION_BASIC)) {
// At first character typed, search only the bigrams
Arrays.fill(mBigramPriorities, 0);
Arrays.fill(mBigramScores, 0);
collectGarbage(mBigramSuggestions, PREF_MAX_BIGRAMS);
if (!TextUtils.isEmpty(prevWordForBigram)) {
@ -346,7 +344,7 @@ public class Suggest implements Dictionary.WordCallback {
mWhiteListDictionary.getWhiteListedWord(typedWordString));
mAutoCorrection.updateAutoCorrectionStatus(mUnigramDictionaries, wordComposer,
mSuggestions, mPriorities, typedWord, mAutoCorrectionThreshold, mCorrectionMode,
mSuggestions, mScores, typedWord, mAutoCorrectionThreshold, mCorrectionMode,
autoText, whitelistedWord);
if (autoText != null) {
@ -364,26 +362,25 @@ public class Suggest implements Dictionary.WordCallback {
if (DBG) {
double normalizedScore = mAutoCorrection.getNormalizedScore();
ArrayList<SuggestedWords.SuggestedWordInfo> frequencyInfoList =
ArrayList<SuggestedWords.SuggestedWordInfo> scoreInfoList =
new ArrayList<SuggestedWords.SuggestedWordInfo>();
frequencyInfoList.add(new SuggestedWords.SuggestedWordInfo("+", false));
final int priorityLength = mPriorities.length;
for (int i = 0; i < priorityLength; ++i) {
scoreInfoList.add(new SuggestedWords.SuggestedWordInfo("+", false));
for (int i = 0; i < mScores.length; ++i) {
if (normalizedScore > 0) {
final String priorityThreshold = Integer.toString(mPriorities[i]) + " (" +
final String scoreThreshold = Integer.toString(mScores[i]) + " (" +
normalizedScore + ")";
frequencyInfoList.add(
new SuggestedWords.SuggestedWordInfo(priorityThreshold, false));
scoreInfoList.add(
new SuggestedWords.SuggestedWordInfo(scoreThreshold, false));
normalizedScore = 0.0;
} else {
final String priority = Integer.toString(mPriorities[i]);
frequencyInfoList.add(new SuggestedWords.SuggestedWordInfo(priority, false));
final String score = Integer.toString(mScores[i]);
scoreInfoList.add(new SuggestedWords.SuggestedWordInfo(score, false));
}
}
for (int i = priorityLength; i < mSuggestions.size(); ++i) {
frequencyInfoList.add(new SuggestedWords.SuggestedWordInfo("--", false));
for (int i = mScores.length; i < mSuggestions.size(); ++i) {
scoreInfoList.add(new SuggestedWords.SuggestedWordInfo("--", false));
}
return new SuggestedWords.Builder().addWords(mSuggestions, frequencyInfoList);
return new SuggestedWords.Builder().addWords(mSuggestions, scoreInfoList);
}
return new SuggestedWords.Builder().addWords(mSuggestions, null);
}
@ -419,52 +416,37 @@ public class Suggest implements Dictionary.WordCallback {
return mAutoCorrection.hasAutoCorrection();
}
private static boolean compareCaseInsensitive(final String lowerOriginalWord,
final char[] word, final int offset, final int length) {
final int originalLength = lowerOriginalWord.length();
if (originalLength == length && Character.isUpperCase(word[offset])) {
for (int i = 0; i < originalLength; i++) {
if (lowerOriginalWord.charAt(i) != Character.toLowerCase(word[offset+i])) {
return false;
}
}
return true;
}
return false;
}
@Override
public boolean addWord(final char[] word, final int offset, final int length, int freq,
public boolean addWord(final char[] word, final int offset, final int length, int score,
final int dicTypeId, final Dictionary.DataType dataType) {
Dictionary.DataType dataTypeForLog = dataType;
ArrayList<CharSequence> suggestions;
int[] priorities;
int prefMaxSuggestions;
final ArrayList<CharSequence> suggestions;
final int[] sortedScores;
final int prefMaxSuggestions;
if(dataType == Dictionary.DataType.BIGRAM) {
suggestions = mBigramSuggestions;
priorities = mBigramPriorities;
sortedScores = mBigramScores;
prefMaxSuggestions = PREF_MAX_BIGRAMS;
} else {
suggestions = mSuggestions;
priorities = mPriorities;
sortedScores = mScores;
prefMaxSuggestions = mPrefMaxSuggestions;
}
int pos = 0;
// Check if it's the same word, only caps are different
if (compareCaseInsensitive(mLowerOriginalWord, word, offset, length)) {
if (Utils.equalsIgnoreCase(mTypedWord, word, offset, length)) {
// TODO: remove this surrounding if clause and move this logic to
// getSuggestedWordBuilder.
if (suggestions.size() > 0) {
final String currentHighestWordLowerCase =
suggestions.get(0).toString().toLowerCase();
final String currentHighestWord = suggestions.get(0).toString();
// If the current highest word is also equal to typed word, we need to compare
// frequency to determine the insertion position. This does not ensure strictly
// correct ordering, but ensures the top score is on top which is enough for
// removing duplicates correctly.
if (compareCaseInsensitive(currentHighestWordLowerCase, word, offset, length)
&& freq <= priorities[0]) {
if (Utils.equalsIgnoreCase(currentHighestWord, word, offset, length)
&& score <= sortedScores[0]) {
pos = 1;
}
}
@ -475,24 +457,24 @@ public class Suggest implements Dictionary.WordCallback {
if(bigramSuggestion >= 0) {
dataTypeForLog = Dictionary.DataType.BIGRAM;
// turn freq from bigram into multiplier specified above
double multiplier = (((double) mBigramPriorities[bigramSuggestion])
double multiplier = (((double) mBigramScores[bigramSuggestion])
/ MAXIMUM_BIGRAM_FREQUENCY)
* (BIGRAM_MULTIPLIER_MAX - BIGRAM_MULTIPLIER_MIN)
+ BIGRAM_MULTIPLIER_MIN;
/* Log.d(TAG,"bigram num: " + bigramSuggestion
+ " wordB: " + mBigramSuggestions.get(bigramSuggestion).toString()
+ " currentPriority: " + freq + " bigramPriority: "
+ mBigramPriorities[bigramSuggestion]
+ " currentScore: " + score + " bigramScore: "
+ mBigramScores[bigramSuggestion]
+ " multiplier: " + multiplier); */
freq = (int)Math.round((freq * multiplier));
score = (int)Math.round((score * multiplier));
}
}
// Check the last one's priority and bail
if (priorities[prefMaxSuggestions - 1] >= freq) return true;
// Check the last one's score and bail
if (sortedScores[prefMaxSuggestions - 1] >= score) return true;
while (pos < prefMaxSuggestions) {
if (priorities[pos] < freq
|| (priorities[pos] == freq && length < suggestions.get(pos).length())) {
if (sortedScores[pos] < score
|| (sortedScores[pos] == score && length < suggestions.get(pos).length())) {
break;
}
pos++;
@ -502,8 +484,8 @@ public class Suggest implements Dictionary.WordCallback {
return true;
}
System.arraycopy(priorities, pos, priorities, pos + 1, prefMaxSuggestions - pos - 1);
priorities[pos] = freq;
System.arraycopy(sortedScores, pos, sortedScores, pos + 1, prefMaxSuggestions - pos - 1);
sortedScores[pos] = score;
int poolSize = mStringPool.size();
StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1)
: new StringBuilder(getApproxMaxWordLength());

View File

@ -285,7 +285,7 @@ public class Utils {
// In dictionary.cpp, getSuggestion() method,
// suggestion scores are computed using the below formula.
// original score (called 'frequency')
// original score
// := pow(mTypedLetterMultiplier (this is defined 2),
// (the number of matched characters between typed word and suggested word))
// * (individual word's score which defined in the unigram dictionary,
@ -295,7 +295,7 @@ public class Utils {
// (full match up to min(before.length(), after.length())
// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
// - If the word is a true full match except for differences in accents or
// capitalization, then treat it as if the frequency was 255.
// capitalization, then treat it as if the score was 255.
// - If before.length() == after.length()
// => multiply by mFullWordMultiplier (this is defined 2))
// So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2
@ -561,4 +561,66 @@ public class Utils {
Log.e(TAG, "Could not load native library jni_latinime");
}
}
/**
* Returns true if a and b are equal ignoring the case of the character.
* @param a first character to check
* @param b second character to check
* @return {@code true} if a and b are equal, {@code false} otherwise.
*/
public static boolean equalsIgnoreCase(char a, char b) {
// Some language, such as Turkish, need testing both cases.
return a == b
|| Character.toLowerCase(a) == Character.toLowerCase(b)
|| Character.toUpperCase(a) == Character.toUpperCase(b);
}
/**
* Returns true if a and b are equal ignoring the case of the characters, including if they are
* both null.
* @param a first CharSequence to check
* @param b second CharSequence to check
* @return {@code true} if a and b are equal, {@code false} otherwise.
*/
public static boolean equalsIgnoreCase(CharSequence a, CharSequence b) {
if (a == b)
return true; // including both a and b are null.
if (a == null || b == null)
return false;
final int length = a.length();
if (length != b.length())
return false;
for (int i = 0; i < length; i++) {
if (!equalsIgnoreCase(a.charAt(i), b.charAt(i)))
return false;
}
return true;
}
/**
* Returns true if a and b are equal ignoring the case of the characters, including if a is null
* and b is zero length.
* @param a CharSequence to check
* @param b character array to check
* @param offset start offset of array b
* @param length length of characters in array b
* @return {@code true} if a and b are equal, {@code false} otherwise.
* @throws IndexOutOfBoundsException
* if {@code offset < 0 || length < 0 || offset + length > data.length}.
* @throws NullPointerException if {@code b == null}.
*/
public static boolean equalsIgnoreCase(CharSequence a, char[] b, int offset, int length) {
if (offset < 0 || length < 0 || length > b.length - offset)
throw new IndexOutOfBoundsException("array.length=" + b.length + " offset=" + offset
+ " length=" + length);
if (a == null)
return length == 0; // including a is null and b is zero length.
if (a.length() != length)
return false;
for (int i = 0; i < length; i++) {
if (!equalsIgnoreCase(a.charAt(i), b[offset + i]))
return false;
}
return true;
}
}