Merge "Combine the skipped and transposed correction"
commit
f77009ac3a
|
@ -190,15 +190,15 @@ void Correction::startToTraverseAllNodes() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Correction::needsToPrune() const {
|
bool Correction::needsToPrune() const {
|
||||||
return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth)
|
return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance;
|
||||||
|| mProximityCount > mMaxEditDistance);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: inline?
|
||||||
Correction::CorrectionType Correction::processSkipChar(
|
Correction::CorrectionType Correction::processSkipChar(
|
||||||
const int32_t c, const bool isTerminal) {
|
const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
|
||||||
mWord[mOutputIndex] = c;
|
mWord[mOutputIndex] = c;
|
||||||
if (needsToTraverseAllNodes() && isTerminal) {
|
if (needsToTraverseAllNodes() && isTerminal) {
|
||||||
mTerminalInputIndex = mInputIndex;
|
mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
|
||||||
mTerminalOutputIndex = mOutputIndex;
|
mTerminalOutputIndex = mOutputIndex;
|
||||||
incrementOutputIndex();
|
incrementOutputIndex();
|
||||||
return TRAVERSE_ALL_ON_TERMINAL;
|
return TRAVERSE_ALL_ON_TERMINAL;
|
||||||
|
@ -212,13 +212,22 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
||||||
const int32_t c, const bool isTerminal) {
|
const int32_t c, const bool isTerminal) {
|
||||||
|
|
||||||
if (mNeedsToTraverseAllNodes || isQuote(c)) {
|
if (mNeedsToTraverseAllNodes || isQuote(c)) {
|
||||||
if (mLastCharExceeded > 0 && mInputIndex == mInputLength - 1
|
bool incremented = false;
|
||||||
&& mProximityInfo->getMatchedProximityId(mInputIndex, c, false)
|
if (mLastCharExceeded && mInputIndex == mInputLength - 1) {
|
||||||
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
|
// TODO: Do not check the proximity if EditDistance exceeds the threshold
|
||||||
|
const int matchId = mProximityInfo->getMatchedProximityId(mInputIndex, c, true);
|
||||||
|
if (matchId == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
|
||||||
mLastCharExceeded = false;
|
mLastCharExceeded = false;
|
||||||
--mExcessiveCount;
|
--mExcessiveCount;
|
||||||
|
} else if (matchId == ProximityInfo::NEAR_PROXIMITY_CHAR) {
|
||||||
|
mLastCharExceeded = false;
|
||||||
|
--mExcessiveCount;
|
||||||
|
++mProximityCount;
|
||||||
}
|
}
|
||||||
return processSkipChar(c, isTerminal);
|
incrementInputIndex();
|
||||||
|
incremented = true;
|
||||||
|
}
|
||||||
|
return processSkipChar(c, isTerminal, incremented);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mExcessivePos >= 0) {
|
if (mExcessivePos >= 0) {
|
||||||
|
@ -258,22 +267,67 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
||||||
} else if (mCorrectionStates[mOutputIndex].mExceeding) {
|
} else if (mCorrectionStates[mOutputIndex].mExceeding) {
|
||||||
--mTransposedCount;
|
--mTransposedCount;
|
||||||
++mExcessiveCount;
|
++mExcessiveCount;
|
||||||
|
--mExcessivePos;
|
||||||
incrementInputIndex();
|
incrementInputIndex();
|
||||||
} else {
|
} else {
|
||||||
--mTransposedCount;
|
--mTransposedCount;
|
||||||
|
if (DEBUG_CORRECTION) {
|
||||||
|
DUMP_WORD(mWord, mOutputIndex);
|
||||||
|
LOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
|
||||||
|
mTransposedCount, mExcessiveCount, c);
|
||||||
|
}
|
||||||
return UNRELATED;
|
return UNRELATED;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const bool noCorrectionsHappenedSoFar =
|
||||||
|
(mSkippedCount + mExcessiveCount + mTransposedCount) == 0;
|
||||||
// TODO: sum counters
|
// TODO: sum counters
|
||||||
const bool checkProximityChars =
|
const bool checkProximityChars = noCorrectionsHappenedSoFar;
|
||||||
!(mSkippedCount > 0 || mExcessivePos >= 0 || mTransposedPos >= 0);
|
|
||||||
const int matchedProximityCharId = secondTransposing
|
const int matchedProximityCharId = secondTransposing
|
||||||
? ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR
|
? ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR
|
||||||
: mProximityInfo->getMatchedProximityId(mInputIndex, c, checkProximityChars);
|
: mProximityInfo->getMatchedProximityId(mInputIndex, c, checkProximityChars);
|
||||||
|
|
||||||
if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
|
if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
|
||||||
if (mInputIndex - 1 < mInputLength && (mExceeding || mTransposing)
|
// TODO: Optimize
|
||||||
|
// As the current char turned out to be an unrelated char,
|
||||||
|
// we will try other correction-types. Please note that mCorrectionStates[mOutputIndex]
|
||||||
|
// here refers to the previous state.
|
||||||
|
if (noCorrectionsHappenedSoFar
|
||||||
|
&& mCorrectionStates[mOutputIndex].mProximityMatching
|
||||||
|
&& mCorrectionStates[mOutputIndex].mExceeding
|
||||||
|
&& mProximityInfo->getMatchedProximityId(mInputIndex, mWord[mOutputIndex], false)
|
||||||
|
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
|
||||||
|
// TODO: check transpose in the same way?
|
||||||
|
++mExcessiveCount;
|
||||||
|
--mProximityCount;
|
||||||
|
} else if (mInputIndex < mInputLength - 1 && mOutputIndex > 0
|
||||||
|
&& mTransposedCount > 0 && mExcessiveCount == 0
|
||||||
|
&& !mCorrectionStates[mOutputIndex].mTransposing
|
||||||
|
&& mCorrectionStates[mOutputIndex - 1].mTransposing
|
||||||
|
&& mProximityInfo->getMatchedProximityId(
|
||||||
|
mInputIndex, mWord[mOutputIndex - 1], false)
|
||||||
|
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR
|
||||||
|
&& mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false)
|
||||||
|
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
|
||||||
|
// Example:
|
||||||
|
// occaisional -> occa sional
|
||||||
|
// mmmmttx -> mmmm(E)mmmmmm
|
||||||
|
mTransposedCount -= 2;
|
||||||
|
++mExcessiveCount;
|
||||||
|
++mInputIndex;
|
||||||
|
} else if (mOutputIndex > 0 && mInputIndex > 0 && mTransposedCount > 0 && mSkippedCount == 0
|
||||||
|
&& !mCorrectionStates[mOutputIndex].mTransposing
|
||||||
|
&& mCorrectionStates[mOutputIndex - 1].mTransposing
|
||||||
|
&& mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false)
|
||||||
|
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
|
||||||
|
// Example:
|
||||||
|
// chcolate -> chocolate
|
||||||
|
// mmttx -> mmsmmmmmm
|
||||||
|
mTransposedCount -= 2;
|
||||||
|
++mSkippedCount;
|
||||||
|
--mInputIndex;
|
||||||
|
} else if (mInputIndex - 1 < mInputLength && (mExceeding || mTransposing)
|
||||||
&& mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false)
|
&& mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false)
|
||||||
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
|
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
|
||||||
if (mTransposing) {
|
if (mTransposing) {
|
||||||
|
@ -282,11 +336,11 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
||||||
++mExcessiveCount;
|
++mExcessiveCount;
|
||||||
incrementInputIndex();
|
incrementInputIndex();
|
||||||
}
|
}
|
||||||
} else if (mSkipping && mProximityCount == 0) {
|
} else if (mProximityCount == 0 && noCorrectionsHappenedSoFar) {
|
||||||
// Skip this letter and continue deeper
|
// Skip this letter and continue deeper
|
||||||
++mSkippedCount;
|
++mSkippedCount;
|
||||||
return processSkipChar(c, isTerminal);
|
return processSkipChar(c, isTerminal, false);
|
||||||
} else if (checkProximityChars
|
} else if (noCorrectionsHappenedSoFar
|
||||||
&& mInputIndex > 0
|
&& mInputIndex > 0
|
||||||
&& mCorrectionStates[mOutputIndex].mProximityMatching
|
&& mCorrectionStates[mOutputIndex].mProximityMatching
|
||||||
&& mCorrectionStates[mOutputIndex].mSkipping
|
&& mCorrectionStates[mOutputIndex].mSkipping
|
||||||
|
@ -296,8 +350,13 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
||||||
// proximity chars of "s", but it should rather be handled as a skipped char.
|
// proximity chars of "s", but it should rather be handled as a skipped char.
|
||||||
++mSkippedCount;
|
++mSkippedCount;
|
||||||
--mProximityCount;
|
--mProximityCount;
|
||||||
return processSkipChar(c, isTerminal);
|
return processSkipChar(c, isTerminal, false);
|
||||||
} else {
|
} else {
|
||||||
|
if (DEBUG_CORRECTION) {
|
||||||
|
DUMP_WORD(mWord, mOutputIndex);
|
||||||
|
LOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
|
||||||
|
mTransposedCount, mExcessiveCount, c);
|
||||||
|
}
|
||||||
return UNRELATED;
|
return UNRELATED;
|
||||||
}
|
}
|
||||||
} else if (secondTransposing
|
} else if (secondTransposing
|
||||||
|
@ -314,8 +373,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
||||||
|
|
||||||
mLastCharExceeded = mExcessiveCount == 0 && mSkippedCount == 0
|
mLastCharExceeded = mExcessiveCount == 0 && mSkippedCount == 0
|
||||||
&& mProximityCount == 0 && mTransposedCount == 0
|
&& mProximityCount == 0 && mTransposedCount == 0
|
||||||
// TODO: remove this line once excessive correction is conmibned to others.
|
&& (mInputIndex == mInputLength - 2);
|
||||||
&& mExcessivePos >= 0 && (mInputIndex == mInputLength - 2);
|
|
||||||
const bool isSameAsUserTypedLength = (mInputLength == mInputIndex + 1) || mLastCharExceeded;
|
const bool isSameAsUserTypedLength = (mInputLength == mInputIndex + 1) || mLastCharExceeded;
|
||||||
if (mLastCharExceeded) {
|
if (mLastCharExceeded) {
|
||||||
++mExcessiveCount;
|
++mExcessiveCount;
|
||||||
|
@ -326,6 +384,9 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
||||||
startToTraverseAllNodes();
|
startToTraverseAllNodes();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const bool needsToTryOnTerminalForTheLastPossibleExcessiveChar =
|
||||||
|
mExceeding && mInputIndex == mInputLength - 2;
|
||||||
|
|
||||||
// Finally, we are ready to go to the next character, the next "virtual node".
|
// Finally, we are ready to go to the next character, the next "virtual node".
|
||||||
// We should advance the input index.
|
// We should advance the input index.
|
||||||
// We do this in this branch of the 'if traverseAllNodes' because we are still matching
|
// We do this in this branch of the 'if traverseAllNodes' because we are still matching
|
||||||
|
@ -335,7 +396,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
||||||
// Also, the next char is one "virtual node" depth more than this char.
|
// Also, the next char is one "virtual node" depth more than this char.
|
||||||
incrementOutputIndex();
|
incrementOutputIndex();
|
||||||
|
|
||||||
if (isSameAsUserTypedLength && isTerminal) {
|
if ((needsToTryOnTerminalForTheLastPossibleExcessiveChar
|
||||||
|
|| isSameAsUserTypedLength) && isTerminal) {
|
||||||
mTerminalInputIndex = mInputIndex - 1;
|
mTerminalInputIndex = mInputIndex - 1;
|
||||||
mTerminalOutputIndex = mOutputIndex - 1;
|
mTerminalOutputIndex = mOutputIndex - 1;
|
||||||
return ON_TERMINAL;
|
return ON_TERMINAL;
|
||||||
|
@ -453,35 +515,25 @@ inline static int editDistance(
|
||||||
int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
|
int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
|
||||||
const int freq, int* editDistanceTable, const Correction* correction) {
|
const int freq, int* editDistanceTable, const Correction* correction) {
|
||||||
const int excessivePos = correction->getExcessivePos();
|
const int excessivePos = correction->getExcessivePos();
|
||||||
const int transposedPos = correction->getTransposedPos();
|
|
||||||
const int inputLength = correction->mInputLength;
|
const int inputLength = correction->mInputLength;
|
||||||
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
|
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
|
||||||
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
|
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
|
||||||
const ProximityInfo *proximityInfo = correction->mProximityInfo;
|
const ProximityInfo *proximityInfo = correction->mProximityInfo;
|
||||||
const int skippedCount = correction->mSkippedCount;
|
const int skippedCount = correction->mSkippedCount;
|
||||||
const int transposedCount = correction->mTransposedCount;
|
const int transposedCount = correction->mTransposedCount / 2;
|
||||||
const int excessiveCount = correction->mExcessiveCount;
|
const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2;
|
||||||
const int proximityMatchedCount = correction->mProximityCount;
|
const int proximityMatchedCount = correction->mProximityCount;
|
||||||
const bool lastCharExceeded = correction->mLastCharExceeded;
|
const bool lastCharExceeded = correction->mLastCharExceeded;
|
||||||
if (skippedCount >= inputLength || inputLength == 0) {
|
if (skippedCount >= inputLength || inputLength == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: remove
|
// TODO: find more robust way
|
||||||
if (transposedPos >= 0 && transposedCount == 0) {
|
bool sameLength = lastCharExceeded ? (inputLength == inputIndex + 2)
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: remove
|
|
||||||
if (excessivePos >= 0 && excessiveCount == 0) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const bool sameLength = lastCharExceeded ? (inputLength == inputIndex + 2)
|
|
||||||
: (inputLength == inputIndex + 1);
|
: (inputLength == inputIndex + 1);
|
||||||
|
|
||||||
// TODO: use mExcessiveCount
|
// TODO: use mExcessiveCount
|
||||||
int matchCount = inputLength - correction->mProximityCount - (excessivePos >= 0 ? 1 : 0);
|
const int matchCount = inputLength - correction->mProximityCount - excessiveCount;
|
||||||
|
|
||||||
const unsigned short* word = correction->mWord;
|
const unsigned short* word = correction->mWord;
|
||||||
const bool skipped = skippedCount > 0;
|
const bool skipped = skippedCount > 0;
|
||||||
|
@ -490,29 +542,51 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
|
||||||
- getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength));
|
- getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength));
|
||||||
|
|
||||||
// TODO: Calculate edit distance for transposed and excessive
|
// TODO: Calculate edit distance for transposed and excessive
|
||||||
int matchWeight;
|
|
||||||
int ed = 0;
|
int ed = 0;
|
||||||
int adJustedProximityMatchedCount = proximityMatchedCount;
|
int adjustedProximityMatchedCount = proximityMatchedCount;
|
||||||
|
|
||||||
|
int finalFreq = freq;
|
||||||
|
|
||||||
// TODO: Optimize this.
|
// TODO: Optimize this.
|
||||||
if (excessivePos < 0 && transposedPos < 0 && (proximityMatchedCount > 0 || skipped)) {
|
// TODO: Ignoring edit distance for transposed char, for now
|
||||||
|
if (transposedCount == 0 && (proximityMatchedCount > 0 || skipped || excessiveCount > 0)) {
|
||||||
const unsigned short* primaryInputWord = proximityInfo->getPrimaryInputWord();
|
const unsigned short* primaryInputWord = proximityInfo->getPrimaryInputWord();
|
||||||
ed = editDistance(editDistanceTable, primaryInputWord,
|
ed = editDistance(editDistanceTable, primaryInputWord,
|
||||||
inputLength, word, outputIndex + 1);
|
inputLength, word, outputIndex + 1);
|
||||||
matchWeight = powerIntCapped(typedLetterMultiplier, outputIndex + 1 - ed);
|
const int matchWeight = powerIntCapped(typedLetterMultiplier,
|
||||||
if (ed == 1 && inputLength == outputIndex) {
|
max(inputLength, outputIndex + 1) - ed);
|
||||||
// Promote a word with just one skipped char
|
multiplyIntCapped(matchWeight, &finalFreq);
|
||||||
multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &matchWeight);
|
|
||||||
}
|
// TODO: Demote further if there are two or more excessive chars with longer user input?
|
||||||
ed = max(0, ed - quoteDiffCount);
|
if (inputLength > outputIndex + 1) {
|
||||||
adJustedProximityMatchedCount = min(max(0, ed - (outputIndex + 1 - inputLength)),
|
multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq);
|
||||||
proximityMatchedCount);
|
|
||||||
} else {
|
|
||||||
matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Demote by edit distance
|
ed = max(0, ed - quoteDiffCount);
|
||||||
int finalFreq = freq * matchWeight;
|
|
||||||
|
if (ed == 1 && (inputLength == outputIndex || inputLength == outputIndex + 2)) {
|
||||||
|
// Promote a word with just one skipped or excessive char
|
||||||
|
if (sameLength) {
|
||||||
|
multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
|
||||||
|
} else {
|
||||||
|
multiplyIntCapped(typedLetterMultiplier, &finalFreq);
|
||||||
|
}
|
||||||
|
} else if (ed == 0) {
|
||||||
|
multiplyIntCapped(typedLetterMultiplier, &finalFreq);
|
||||||
|
sameLength = true;
|
||||||
|
}
|
||||||
|
adjustedProximityMatchedCount = min(max(0, ed - (outputIndex + 1 - inputLength)),
|
||||||
|
proximityMatchedCount);
|
||||||
|
} else {
|
||||||
|
// TODO: Calculate the edit distance for transposed char
|
||||||
|
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
|
||||||
|
multiplyIntCapped(matchWeight, &finalFreq);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (proximityInfo->getMatchedProximityId(0, word[0], true)
|
||||||
|
== ProximityInfo::UNRELATED_CHAR) {
|
||||||
|
multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq);
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// Promotion and Demotion for each correction
|
// Promotion and Demotion for each correction
|
||||||
|
@ -530,13 +604,16 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
|
||||||
}
|
}
|
||||||
|
|
||||||
// Demotion for a word with transposed character
|
// Demotion for a word with transposed character
|
||||||
if (transposedPos >= 0) multiplyRate(
|
if (transposedCount > 0) multiplyRate(
|
||||||
WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
|
WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
|
||||||
|
|
||||||
// Demotion for a word with excessive character
|
// Demotion for a word with excessive character
|
||||||
if (excessivePos >= 0) {
|
if (excessiveCount > 0) {
|
||||||
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
|
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
|
||||||
if (!proximityInfo->existsAdjacentProximityChars(inputIndex)) {
|
if (!lastCharExceeded && !proximityInfo->existsAdjacentProximityChars(excessivePos)) {
|
||||||
|
if (DEBUG_CORRECTION_FREQ) {
|
||||||
|
LOGI("Double excessive demotion");
|
||||||
|
}
|
||||||
// If an excessive character is not adjacent to the left char or the right char,
|
// If an excessive character is not adjacent to the left char or the right char,
|
||||||
// we will demote this word.
|
// we will demote this word.
|
||||||
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
|
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
|
||||||
|
@ -544,7 +621,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
|
||||||
}
|
}
|
||||||
|
|
||||||
// Promotion for a word with proximity characters
|
// Promotion for a word with proximity characters
|
||||||
for (int i = 0; i < adJustedProximityMatchedCount; ++i) {
|
for (int i = 0; i < adjustedProximityMatchedCount; ++i) {
|
||||||
// A word with proximity corrections
|
// A word with proximity corrections
|
||||||
if (DEBUG_DICT_FULL) {
|
if (DEBUG_DICT_FULL) {
|
||||||
LOGI("Found a proximity correction.");
|
LOGI("Found a proximity correction.");
|
||||||
|
@ -553,20 +630,22 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
|
||||||
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
|
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
|
||||||
}
|
}
|
||||||
|
|
||||||
const int errorCount = proximityMatchedCount + skippedCount;
|
const int errorCount = adjustedProximityMatchedCount > 0
|
||||||
|
? adjustedProximityMatchedCount
|
||||||
|
: (proximityMatchedCount + transposedCount);
|
||||||
multiplyRate(
|
multiplyRate(
|
||||||
100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputLength, &finalFreq);
|
100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputLength, &finalFreq);
|
||||||
|
|
||||||
// Promotion for an exactly matched word
|
// Promotion for an exactly matched word
|
||||||
if (matchCount == outputIndex + 1) {
|
if (ed == 0) {
|
||||||
// Full exact match
|
// Full exact match
|
||||||
if (sameLength && transposedPos < 0 && !skipped && excessivePos < 0) {
|
if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0) {
|
||||||
finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
|
finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Promote a word with no correction
|
// Promote a word with no correction
|
||||||
if (proximityMatchedCount == 0 && transposedPos < 0 && !skipped && excessivePos < 0) {
|
if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0) {
|
||||||
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
|
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -590,6 +669,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
|
||||||
multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
|
multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Do not use sameLength?
|
||||||
if (sameLength) {
|
if (sameLength) {
|
||||||
multiplyIntCapped(fullWordMultiplier, &finalFreq);
|
multiplyIntCapped(fullWordMultiplier, &finalFreq);
|
||||||
}
|
}
|
||||||
|
@ -598,6 +678,13 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
|
||||||
LOGI("calc: %d, %d", outputIndex, sameLength);
|
LOGI("calc: %d, %d", outputIndex, sameLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (DEBUG_CORRECTION_FREQ) {
|
||||||
|
DUMP_WORD(correction->mWord, outputIndex + 1);
|
||||||
|
LOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d", proximityMatchedCount,
|
||||||
|
skippedCount, transposedCount, excessiveCount, lastCharExceeded, sameLength,
|
||||||
|
quoteDiffCount, ed, finalFreq);
|
||||||
|
}
|
||||||
|
|
||||||
return finalFreq;
|
return finalFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -99,7 +99,8 @@ private:
|
||||||
inline bool needsToTraverseAllNodes();
|
inline bool needsToTraverseAllNodes();
|
||||||
inline void startToTraverseAllNodes();
|
inline void startToTraverseAllNodes();
|
||||||
inline bool isQuote(const unsigned short c);
|
inline bool isQuote(const unsigned short c);
|
||||||
inline CorrectionType processSkipChar(const int32_t c, const bool isTerminal);
|
inline CorrectionType processSkipChar(
|
||||||
|
const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
|
||||||
|
|
||||||
// TODO: remove
|
// TODO: remove
|
||||||
inline void incrementProximityCount() {
|
inline void incrementProximityCount() {
|
||||||
|
|
|
@ -95,10 +95,12 @@ static void prof_out(void) {
|
||||||
#define DEBUG_DICT true
|
#define DEBUG_DICT true
|
||||||
#define DEBUG_DICT_FULL false
|
#define DEBUG_DICT_FULL false
|
||||||
#define DEBUG_EDIT_DISTANCE false
|
#define DEBUG_EDIT_DISTANCE false
|
||||||
#define DEBUG_SHOW_FOUND_WORD DEBUG_DICT_FULL
|
#define DEBUG_SHOW_FOUND_WORD false
|
||||||
#define DEBUG_NODE DEBUG_DICT_FULL
|
#define DEBUG_NODE DEBUG_DICT_FULL
|
||||||
#define DEBUG_TRACE DEBUG_DICT_FULL
|
#define DEBUG_TRACE DEBUG_DICT_FULL
|
||||||
#define DEBUG_PROXIMITY_INFO true
|
#define DEBUG_PROXIMITY_INFO true
|
||||||
|
#define DEBUG_CORRECTION false
|
||||||
|
#define DEBUG_CORRECTION_FREQ true
|
||||||
|
|
||||||
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
|
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
|
||||||
|
|
||||||
|
@ -121,6 +123,8 @@ static void dumpWord(const unsigned short* word, const int length) {
|
||||||
#define DEBUG_NODE false
|
#define DEBUG_NODE false
|
||||||
#define DEBUG_TRACE false
|
#define DEBUG_TRACE false
|
||||||
#define DEBUG_PROXIMITY_INFO false
|
#define DEBUG_PROXIMITY_INFO false
|
||||||
|
#define DEBUG_CORRECTION false
|
||||||
|
#define DEBUG_CORRECTION_FREQ false
|
||||||
|
|
||||||
#define DUMP_WORD(word, length)
|
#define DUMP_WORD(word, length)
|
||||||
|
|
||||||
|
@ -178,7 +182,9 @@ static void dumpWord(const unsigned short* word, const int length) {
|
||||||
#define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90
|
#define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90
|
||||||
#define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105
|
#define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105
|
||||||
#define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE 160
|
#define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE 160
|
||||||
#define CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE 42
|
#define CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE 45
|
||||||
|
#define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70
|
||||||
|
#define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96
|
||||||
|
|
||||||
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
|
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
|
||||||
// This is only used for the size of array. Not to be used in c functions.
|
// This is only used for the size of array. Not to be used in c functions.
|
||||||
|
|
|
@ -189,32 +189,19 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
||||||
|
|
||||||
// TODO: remove
|
// TODO: remove
|
||||||
PROF_START(1);
|
PROF_START(1);
|
||||||
// Note: This line is intentionally left blank
|
getSuggestionCandidates();
|
||||||
PROF_END(1);
|
PROF_END(1);
|
||||||
|
|
||||||
PROF_START(2);
|
PROF_START(2);
|
||||||
// Suggestion with missing character
|
// Note: This line is intentionally left blank
|
||||||
if (DEBUG_DICT) {
|
|
||||||
LOGI("--- Suggest missing characters");
|
|
||||||
}
|
|
||||||
getSuggestionCandidates(0, -1, -1);
|
|
||||||
PROF_END(2);
|
PROF_END(2);
|
||||||
|
|
||||||
PROF_START(3);
|
PROF_START(3);
|
||||||
// Suggestion with excessive character
|
// Note: This line is intentionally left blank
|
||||||
if (DEBUG_DICT) {
|
|
||||||
LOGI("--- Suggest excessive characters");
|
|
||||||
}
|
|
||||||
getSuggestionCandidates(-1, 0, -1);
|
|
||||||
PROF_END(3);
|
PROF_END(3);
|
||||||
|
|
||||||
PROF_START(4);
|
PROF_START(4);
|
||||||
// Suggestion with transposed characters
|
// Note: This line is intentionally left blank
|
||||||
// Only suggest words that length is mInputLength
|
|
||||||
if (DEBUG_DICT) {
|
|
||||||
LOGI("--- Suggest transposed characters");
|
|
||||||
}
|
|
||||||
getSuggestionCandidates(-1, -1, 0);
|
|
||||||
PROF_END(4);
|
PROF_END(4);
|
||||||
|
|
||||||
PROF_START(5);
|
PROF_START(5);
|
||||||
|
@ -328,14 +315,9 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
|
||||||
static const char QUOTE = '\'';
|
static const char QUOTE = '\'';
|
||||||
static const char SPACE = ' ';
|
static const char SPACE = ' ';
|
||||||
|
|
||||||
void UnigramDictionary::getSuggestionCandidates(const int skipPos,
|
void UnigramDictionary::getSuggestionCandidates() {
|
||||||
const int excessivePos, const int transposedPos) {
|
// TODO: Remove setCorrectionParams
|
||||||
if (DEBUG_DICT) {
|
mCorrection->setCorrectionParams(0, 0, 0,
|
||||||
assert(transposedPos + 1 < mInputLength);
|
|
||||||
assert(excessivePos < mInputLength);
|
|
||||||
assert(missingPos < mInputLength);
|
|
||||||
}
|
|
||||||
mCorrection->setCorrectionParams(skipPos, excessivePos, transposedPos,
|
|
||||||
-1 /* spaceProximityPos */, -1 /* missingSpacePos */);
|
-1 /* spaceProximityPos */, -1 /* missingSpacePos */);
|
||||||
int rootPosition = ROOT_POS;
|
int rootPosition = ROOT_POS;
|
||||||
// Get the number of children of root, then increment the position
|
// Get the number of children of root, then increment the position
|
||||||
|
@ -727,6 +709,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
||||||
pos = BinaryFormat::skipFrequency(flags, pos);
|
pos = BinaryFormat::skipFrequency(flags, pos);
|
||||||
*nextSiblingPosition =
|
*nextSiblingPosition =
|
||||||
BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
|
BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
|
||||||
|
if (DEBUG_DICT_FULL) {
|
||||||
|
LOGI("Traversing was pruned.");
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -87,8 +87,7 @@ private:
|
||||||
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||||
const int *ycoordinates, const int *codes, const int codesSize,
|
const int *ycoordinates, const int *codes, const int codesSize,
|
||||||
unsigned short *outWords, int *frequencies);
|
unsigned short *outWords, int *frequencies);
|
||||||
void getSuggestionCandidates(const int skipPos, const int excessivePos,
|
void getSuggestionCandidates();
|
||||||
const int transposedPos);
|
|
||||||
bool addWord(unsigned short *word, int length, int frequency);
|
bool addWord(unsigned short *word, int length, int frequency);
|
||||||
void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction);
|
void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction);
|
||||||
void getMissingSpaceWords(
|
void getMissingSpaceWords(
|
||||||
|
|
Loading…
Reference in New Issue