Merge "Demote a word with mistyped space and missing space according to the length of each word"
This commit is contained in:
commit
9fcd9384e8
1 changed files with 50 additions and 5 deletions
|
@ -417,6 +417,54 @@ inline static void multiplyRate(const int rate, int *freq) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline static int calcFreqForSplitTwoWords(
|
||||||
|
const int typedLetterMultiplier, const int firstWordLength,
|
||||||
|
const int secondWordLength, const int firstFreq, const int secondFreq) {
|
||||||
|
if (firstWordLength == 0 || secondWordLength == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
const int firstDemotionRate = 100 - 100 / (firstWordLength + 1);
|
||||||
|
int tempFirstFreq = firstFreq;
|
||||||
|
multiplyRate(firstDemotionRate, &tempFirstFreq);
|
||||||
|
|
||||||
|
const int secondDemotionRate = 100 - 100 / (secondWordLength + 1);
|
||||||
|
int tempSecondFreq = secondFreq;
|
||||||
|
multiplyRate(secondDemotionRate, &tempSecondFreq);
|
||||||
|
|
||||||
|
const int totalLength = firstWordLength + secondWordLength;
|
||||||
|
|
||||||
|
// Promote pairFreq with multiplying by 2, because the word length is the same as the typed
|
||||||
|
// length.
|
||||||
|
int totalFreq = tempFirstFreq + tempSecondFreq;
|
||||||
|
|
||||||
|
// This is a workaround to try offsetting the not-enough-demotion which will be done in
|
||||||
|
// calcNormalizedScore in Utils.java.
|
||||||
|
// In calcNormalizedScore the score will be demoted by (1 - 1 / length)
|
||||||
|
// but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by
|
||||||
|
// (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length))
|
||||||
|
const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength);
|
||||||
|
multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq);
|
||||||
|
|
||||||
|
// At this moment, totalFreq is calculated by the following formula:
|
||||||
|
// (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
|
||||||
|
// * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
|
||||||
|
|
||||||
|
for (int i = 0; i < totalLength; ++i) {
|
||||||
|
totalFreq *= typedLetterMultiplier;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is another workaround to offset the demotion which will be done in
|
||||||
|
// calcNormalizedScore in Utils.java.
|
||||||
|
// In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote
|
||||||
|
// the same amount because we already have adjusted the synthetic freq of this "missing or
|
||||||
|
// mistyped space" suggestion candidate above in this method.
|
||||||
|
const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength);
|
||||||
|
multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq);
|
||||||
|
|
||||||
|
multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
|
||||||
|
return totalFreq;
|
||||||
|
}
|
||||||
|
|
||||||
bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
|
bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
|
||||||
const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos,
|
const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos,
|
||||||
const int secondWordLength) {
|
const int secondWordLength) {
|
||||||
|
@ -448,15 +496,12 @@ bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
|
||||||
word[i] = mWord[i - firstWordLength - 1];
|
word[i] = mWord[i - firstWordLength - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Promote pairFreq with multiplying by 2, because the word length is the same as the typed
|
int pairFreq = calcFreqForSplitTwoWords(
|
||||||
// length.
|
TYPED_LETTER_MULTIPLIER, firstWordLength, secondWordLength, firstFreq, secondFreq);
|
||||||
int pairFreq = firstFreq + secondFreq;
|
|
||||||
for (int i = 0; i < inputLength; ++i) pairFreq *= TYPED_LETTER_MULTIPLIER;
|
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
LOGI("Missing space: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,
|
LOGI("Missing space: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,
|
||||||
TYPED_LETTER_MULTIPLIER);
|
TYPED_LETTER_MULTIPLIER);
|
||||||
}
|
}
|
||||||
multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &pairFreq);
|
|
||||||
addWord(word, newWordLength, pairFreq);
|
addWord(word, newWordLength, pairFreq);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue