Handle overflow properly in multiplyRate

Bug: 3401513
Change-Id: I8dd2523caa58bb51c378a01e160a58f9106ce9b8
This commit is contained in:
satok 2011-04-26 14:50:54 +09:00
parent 1123bcaf2e
commit b2e5e5937c
2 changed files with 48 additions and 19 deletions

View file

@ -48,6 +48,7 @@ public class Utils {
private static final String TAG = Utils.class.getSimpleName(); private static final String TAG = Utils.class.getSimpleName();
private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4; private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4;
private static boolean DBG = LatinImeLogger.sDBG; private static boolean DBG = LatinImeLogger.sDBG;
private static boolean DBG_EDIT_DISTANCE = false;
private Utils() { private Utils() {
// Intentional empty constructor for utility class. // Intentional empty constructor for utility class.
@ -289,7 +290,7 @@ public class Utils {
} }
} }
} }
if (LatinImeLogger.sDBG) { if (DBG_EDIT_DISTANCE) {
Log.d(TAG, "editDistance:" + s + "," + t); Log.d(TAG, "editDistance:" + s + "," + t);
for (int i = 0; i < dp.length; ++i) { for (int i = 0; i < dp.length; ++i) {
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
@ -338,6 +339,7 @@ public class Utils {
private static final int MAX_INITIAL_SCORE = 255; private static final int MAX_INITIAL_SCORE = 255;
private static final int TYPED_LETTER_MULTIPLIER = 2; private static final int TYPED_LETTER_MULTIPLIER = 2;
private static final int FULL_WORD_MULTIPLIER = 2; private static final int FULL_WORD_MULTIPLIER = 2;
private static final int S_INT_MAX = 2147483647;
public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) { public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) {
final int beforeLength = before.length(); final int beforeLength = before.length();
final int afterLength = after.length(); final int afterLength = after.length();
@ -352,7 +354,7 @@ public class Utils {
} }
} }
if (spaceCount == afterLength) return 0; if (spaceCount == afterLength) return 0;
final double maximumScore = MAX_INITIAL_SCORE final double maximumScore = score == S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
* Math.pow( * Math.pow(
TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength - spaceCount)) TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength - spaceCount))
* FULL_WORD_MULTIPLIER; * FULL_WORD_MULTIPLIER;

View file

@ -300,7 +300,7 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
if (DEBUG_DICT) { if (DEBUG_DICT) {
char s[length + 1]; char s[length + 1];
for (int i = 0; i <= length; i++) s[i] = word[i]; for (int i = 0; i <= length; i++) s[i] = word[i];
LOGI("Added word = %s, freq = %d", s, frequency); LOGI("Added word = %s, freq = %d, %d", s, frequency, S_INT_MAX);
} }
memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]), memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
(char*) mFrequencies + insertAt * sizeof(mFrequencies[0]), (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
@ -409,11 +409,44 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
} }
} }
inline static void multiplyRate(const int rate, int *freq) { static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
if (rate > 1000000) { static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
*freq = (*freq / 100) * rate; return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
}
static const int TWO_31ST_DIV_2 = S_INT_MAX / 2;
inline static void multiplyIntCapped(const int multiplier, int *base) {
const int temp = *base;
if (temp != S_INT_MAX) {
// Branch if multiplier == 2 for the optimization
if (multiplier == 2) {
*base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX;
} else { } else {
*freq = *freq * rate / 100; const int tempRetval = temp * multiplier;
*base = tempRetval >= temp ? tempRetval : S_INT_MAX;
}
}
}
inline static int powerIntCapped(const int base, const int n) {
if (false && base == 2) {
return n < 31 ? 1 << n : S_INT_MAX;
} else {
int ret = base;
for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret);
return ret;
}
}
inline static void multiplyRate(const int rate, int *freq) {
if (*freq != S_INT_MAX) {
if (*freq > 1000000) {
*freq /= 100;
multiplyIntCapped(rate, freq);
} else {
multiplyIntCapped(rate, freq);
*freq /= 100;
}
} }
} }
@ -449,9 +482,7 @@ inline static int calcFreqForSplitTwoWords(
// (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1))) // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
// * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1)) // * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
for (int i = 0; i < totalLength; ++i) { multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);
totalFreq *= typedLetterMultiplier;
}
// This is another workaround to offset the demotion which will be done in // This is another workaround to offset the demotion which will be done in
// calcNormalizedScore in Utils.java. // calcNormalizedScore in Utils.java.
@ -499,7 +530,7 @@ bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
int pairFreq = calcFreqForSplitTwoWords( int pairFreq = calcFreqForSplitTwoWords(
TYPED_LETTER_MULTIPLIER, firstWordLength, secondWordLength, firstFreq, secondFreq); TYPED_LETTER_MULTIPLIER, firstWordLength, secondWordLength, firstFreq, secondFreq);
if (DEBUG_DICT) { if (DEBUG_DICT) {
LOGI("Missing space: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength, LOGI("Split two words: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,
TYPED_LETTER_MULTIPLIER); TYPED_LETTER_MULTIPLIER);
} }
addWord(word, newWordLength, pairFreq); addWord(word, newWordLength, pairFreq);
@ -559,10 +590,6 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
} }
} }
static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
}
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth, inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos, const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos,
const int freq, const bool sameLength) const { const int freq, const bool sameLength) const {
@ -591,7 +618,7 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
} }
} }
int lengthFreq = TYPED_LETTER_MULTIPLIER; int lengthFreq = TYPED_LETTER_MULTIPLIER;
for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER; multiplyIntCapped(powerIntCapped(TYPED_LETTER_MULTIPLIER, depth), &lengthFreq);
if (lengthFreq == matchWeight) { if (lengthFreq == matchWeight) {
// Full exact match // Full exact match
if (depth > 1) { if (depth > 1) {
@ -608,13 +635,13 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
if (DEBUG_DICT) { if (DEBUG_DICT) {
LOGI("Found one proximity correction."); LOGI("Found one proximity correction.");
} }
finalFreq *= 2; multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &finalFreq);
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
} }
if (DEBUG_DICT) { if (DEBUG_DICT) {
LOGI("calc: %d, %d", depth, sameLength); LOGI("calc: %d, %d", depth, sameLength);
} }
if (sameLength) finalFreq *= FULL_WORD_MULTIPLIER; if (sameLength) multiplyIntCapped(FULL_WORD_MULTIPLIER, &finalFreq);
return finalFreq; return finalFreq;
} }
@ -767,7 +794,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
// If inputIndex is greater than mInputLength, that means there is no // If inputIndex is greater than mInputLength, that means there is no
// proximity chars. So, we don't need to check proximity. // proximity chars. So, we don't need to check proximity.
if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) { if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
matchWeight = matchWeight * TYPED_LETTER_MULTIPLIER; multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight);
} }
bool isSameAsUserTypedLength = mInputLength == inputIndex + 1 bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
|| (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2); || (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);