Handle overflow properly in multiplyRate
Bug: 3401513 Change-Id: I8dd2523caa58bb51c378a01e160a58f9106ce9b8
This commit is contained in:
parent
1123bcaf2e
commit
b2e5e5937c
2 changed files with 48 additions and 19 deletions
|
@ -48,6 +48,7 @@ public class Utils {
|
||||||
private static final String TAG = Utils.class.getSimpleName();
|
private static final String TAG = Utils.class.getSimpleName();
|
||||||
private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4;
|
private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4;
|
||||||
private static boolean DBG = LatinImeLogger.sDBG;
|
private static boolean DBG = LatinImeLogger.sDBG;
|
||||||
|
private static boolean DBG_EDIT_DISTANCE = false;
|
||||||
|
|
||||||
private Utils() {
|
private Utils() {
|
||||||
// Intentional empty constructor for utility class.
|
// Intentional empty constructor for utility class.
|
||||||
|
@ -289,7 +290,7 @@ public class Utils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (LatinImeLogger.sDBG) {
|
if (DBG_EDIT_DISTANCE) {
|
||||||
Log.d(TAG, "editDistance:" + s + "," + t);
|
Log.d(TAG, "editDistance:" + s + "," + t);
|
||||||
for (int i = 0; i < dp.length; ++i) {
|
for (int i = 0; i < dp.length; ++i) {
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
|
@ -338,6 +339,7 @@ public class Utils {
|
||||||
private static final int MAX_INITIAL_SCORE = 255;
|
private static final int MAX_INITIAL_SCORE = 255;
|
||||||
private static final int TYPED_LETTER_MULTIPLIER = 2;
|
private static final int TYPED_LETTER_MULTIPLIER = 2;
|
||||||
private static final int FULL_WORD_MULTIPLIER = 2;
|
private static final int FULL_WORD_MULTIPLIER = 2;
|
||||||
|
private static final int S_INT_MAX = 2147483647;
|
||||||
public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) {
|
public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) {
|
||||||
final int beforeLength = before.length();
|
final int beforeLength = before.length();
|
||||||
final int afterLength = after.length();
|
final int afterLength = after.length();
|
||||||
|
@ -352,7 +354,7 @@ public class Utils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (spaceCount == afterLength) return 0;
|
if (spaceCount == afterLength) return 0;
|
||||||
final double maximumScore = MAX_INITIAL_SCORE
|
final double maximumScore = score == S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
|
||||||
* Math.pow(
|
* Math.pow(
|
||||||
TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength - spaceCount))
|
TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength - spaceCount))
|
||||||
* FULL_WORD_MULTIPLIER;
|
* FULL_WORD_MULTIPLIER;
|
||||||
|
|
|
@ -300,7 +300,7 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
char s[length + 1];
|
char s[length + 1];
|
||||||
for (int i = 0; i <= length; i++) s[i] = word[i];
|
for (int i = 0; i <= length; i++) s[i] = word[i];
|
||||||
LOGI("Added word = %s, freq = %d", s, frequency);
|
LOGI("Added word = %s, freq = %d, %d", s, frequency, S_INT_MAX);
|
||||||
}
|
}
|
||||||
memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
|
memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
|
||||||
(char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
|
(char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
|
||||||
|
@ -409,11 +409,44 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static void multiplyRate(const int rate, int *freq) {
|
static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
|
||||||
if (rate > 1000000) {
|
static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
|
||||||
*freq = (*freq / 100) * rate;
|
return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const int TWO_31ST_DIV_2 = S_INT_MAX / 2;
|
||||||
|
inline static void multiplyIntCapped(const int multiplier, int *base) {
|
||||||
|
const int temp = *base;
|
||||||
|
if (temp != S_INT_MAX) {
|
||||||
|
// Branch if multiplier == 2 for the optimization
|
||||||
|
if (multiplier == 2) {
|
||||||
|
*base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX;
|
||||||
} else {
|
} else {
|
||||||
*freq = *freq * rate / 100;
|
const int tempRetval = temp * multiplier;
|
||||||
|
*base = tempRetval >= temp ? tempRetval : S_INT_MAX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static int powerIntCapped(const int base, const int n) {
|
||||||
|
if (false && base == 2) {
|
||||||
|
return n < 31 ? 1 << n : S_INT_MAX;
|
||||||
|
} else {
|
||||||
|
int ret = base;
|
||||||
|
for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static void multiplyRate(const int rate, int *freq) {
|
||||||
|
if (*freq != S_INT_MAX) {
|
||||||
|
if (*freq > 1000000) {
|
||||||
|
*freq /= 100;
|
||||||
|
multiplyIntCapped(rate, freq);
|
||||||
|
} else {
|
||||||
|
multiplyIntCapped(rate, freq);
|
||||||
|
*freq /= 100;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -449,9 +482,7 @@ inline static int calcFreqForSplitTwoWords(
|
||||||
// (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
|
// (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
|
||||||
// * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
|
// * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
|
||||||
|
|
||||||
for (int i = 0; i < totalLength; ++i) {
|
multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);
|
||||||
totalFreq *= typedLetterMultiplier;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is another workaround to offset the demotion which will be done in
|
// This is another workaround to offset the demotion which will be done in
|
||||||
// calcNormalizedScore in Utils.java.
|
// calcNormalizedScore in Utils.java.
|
||||||
|
@ -499,7 +530,7 @@ bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
|
||||||
int pairFreq = calcFreqForSplitTwoWords(
|
int pairFreq = calcFreqForSplitTwoWords(
|
||||||
TYPED_LETTER_MULTIPLIER, firstWordLength, secondWordLength, firstFreq, secondFreq);
|
TYPED_LETTER_MULTIPLIER, firstWordLength, secondWordLength, firstFreq, secondFreq);
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
LOGI("Missing space: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,
|
LOGI("Split two words: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,
|
||||||
TYPED_LETTER_MULTIPLIER);
|
TYPED_LETTER_MULTIPLIER);
|
||||||
}
|
}
|
||||||
addWord(word, newWordLength, pairFreq);
|
addWord(word, newWordLength, pairFreq);
|
||||||
|
@ -559,10 +590,6 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
|
|
||||||
static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
|
|
||||||
return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
|
|
||||||
}
|
|
||||||
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
|
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
|
||||||
const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos,
|
const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos,
|
||||||
const int freq, const bool sameLength) const {
|
const int freq, const bool sameLength) const {
|
||||||
|
@ -591,7 +618,7 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int lengthFreq = TYPED_LETTER_MULTIPLIER;
|
int lengthFreq = TYPED_LETTER_MULTIPLIER;
|
||||||
for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER;
|
multiplyIntCapped(powerIntCapped(TYPED_LETTER_MULTIPLIER, depth), &lengthFreq);
|
||||||
if (lengthFreq == matchWeight) {
|
if (lengthFreq == matchWeight) {
|
||||||
// Full exact match
|
// Full exact match
|
||||||
if (depth > 1) {
|
if (depth > 1) {
|
||||||
|
@ -608,13 +635,13 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
LOGI("Found one proximity correction.");
|
LOGI("Found one proximity correction.");
|
||||||
}
|
}
|
||||||
finalFreq *= 2;
|
multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &finalFreq);
|
||||||
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
|
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
|
||||||
}
|
}
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
LOGI("calc: %d, %d", depth, sameLength);
|
LOGI("calc: %d, %d", depth, sameLength);
|
||||||
}
|
}
|
||||||
if (sameLength) finalFreq *= FULL_WORD_MULTIPLIER;
|
if (sameLength) multiplyIntCapped(FULL_WORD_MULTIPLIER, &finalFreq);
|
||||||
return finalFreq;
|
return finalFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -767,7 +794,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
|
||||||
// If inputIndex is greater than mInputLength, that means there is no
|
// If inputIndex is greater than mInputLength, that means there is no
|
||||||
// proximity chars. So, we don't need to check proximity.
|
// proximity chars. So, we don't need to check proximity.
|
||||||
if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
|
if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
|
||||||
matchWeight = matchWeight * TYPED_LETTER_MULTIPLIER;
|
multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight);
|
||||||
}
|
}
|
||||||
bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
|
bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
|
||||||
|| (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
|
|| (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
|
||||||
|
|
Loading…
Reference in a new issue