Merge "Combine the skipped and transposed correction"

main
satok 2011-08-23 07:42:24 -07:00 committed by Android (Google) Code Review
commit f77009ac3a
5 changed files with 162 additions and 84 deletions

View File

@ -190,15 +190,15 @@ void Correction::startToTraverseAllNodes() {
} }
bool Correction::needsToPrune() const { bool Correction::needsToPrune() const {
return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth) return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance;
|| mProximityCount > mMaxEditDistance);
} }
// TODO: inline?
Correction::CorrectionType Correction::processSkipChar( Correction::CorrectionType Correction::processSkipChar(
const int32_t c, const bool isTerminal) { const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
mWord[mOutputIndex] = c; mWord[mOutputIndex] = c;
if (needsToTraverseAllNodes() && isTerminal) { if (needsToTraverseAllNodes() && isTerminal) {
mTerminalInputIndex = mInputIndex; mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
mTerminalOutputIndex = mOutputIndex; mTerminalOutputIndex = mOutputIndex;
incrementOutputIndex(); incrementOutputIndex();
return TRAVERSE_ALL_ON_TERMINAL; return TRAVERSE_ALL_ON_TERMINAL;
@ -212,13 +212,22 @@ Correction::CorrectionType Correction::processCharAndCalcState(
const int32_t c, const bool isTerminal) { const int32_t c, const bool isTerminal) {
if (mNeedsToTraverseAllNodes || isQuote(c)) { if (mNeedsToTraverseAllNodes || isQuote(c)) {
if (mLastCharExceeded > 0 && mInputIndex == mInputLength - 1 bool incremented = false;
&& mProximityInfo->getMatchedProximityId(mInputIndex, c, false) if (mLastCharExceeded && mInputIndex == mInputLength - 1) {
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) { // TODO: Do not check the proximity if EditDistance exceeds the threshold
mLastCharExceeded = false; const int matchId = mProximityInfo->getMatchedProximityId(mInputIndex, c, true);
--mExcessiveCount; if (matchId == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
mLastCharExceeded = false;
--mExcessiveCount;
} else if (matchId == ProximityInfo::NEAR_PROXIMITY_CHAR) {
mLastCharExceeded = false;
--mExcessiveCount;
++mProximityCount;
}
incrementInputIndex();
incremented = true;
} }
return processSkipChar(c, isTerminal); return processSkipChar(c, isTerminal, incremented);
} }
if (mExcessivePos >= 0) { if (mExcessivePos >= 0) {
@ -258,22 +267,67 @@ Correction::CorrectionType Correction::processCharAndCalcState(
} else if (mCorrectionStates[mOutputIndex].mExceeding) { } else if (mCorrectionStates[mOutputIndex].mExceeding) {
--mTransposedCount; --mTransposedCount;
++mExcessiveCount; ++mExcessiveCount;
--mExcessivePos;
incrementInputIndex(); incrementInputIndex();
} else { } else {
--mTransposedCount; --mTransposedCount;
if (DEBUG_CORRECTION) {
DUMP_WORD(mWord, mOutputIndex);
LOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
return UNRELATED; return UNRELATED;
} }
} }
const bool noCorrectionsHappenedSoFar =
(mSkippedCount + mExcessiveCount + mTransposedCount) == 0;
// TODO: sum counters // TODO: sum counters
const bool checkProximityChars = const bool checkProximityChars = noCorrectionsHappenedSoFar;
!(mSkippedCount > 0 || mExcessivePos >= 0 || mTransposedPos >= 0);
const int matchedProximityCharId = secondTransposing const int matchedProximityCharId = secondTransposing
? ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR ? ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR
: mProximityInfo->getMatchedProximityId(mInputIndex, c, checkProximityChars); : mProximityInfo->getMatchedProximityId(mInputIndex, c, checkProximityChars);
if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) { if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
if (mInputIndex - 1 < mInputLength && (mExceeding || mTransposing) // TODO: Optimize
// As the current char turned out to be an unrelated char,
// we will try other correction-types. Please note that mCorrectionStates[mOutputIndex]
// here refers to the previous state.
if (noCorrectionsHappenedSoFar
&& mCorrectionStates[mOutputIndex].mProximityMatching
&& mCorrectionStates[mOutputIndex].mExceeding
&& mProximityInfo->getMatchedProximityId(mInputIndex, mWord[mOutputIndex], false)
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
// TODO: check transpose in the same way?
++mExcessiveCount;
--mProximityCount;
} else if (mInputIndex < mInputLength - 1 && mOutputIndex > 0
&& mTransposedCount > 0 && mExcessiveCount == 0
&& !mCorrectionStates[mOutputIndex].mTransposing
&& mCorrectionStates[mOutputIndex - 1].mTransposing
&& mProximityInfo->getMatchedProximityId(
mInputIndex, mWord[mOutputIndex - 1], false)
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR
&& mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false)
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
// Example:
// occaisional -> occa sional
// mmmmttx -> mmmm(E)mmmmmm
mTransposedCount -= 2;
++mExcessiveCount;
++mInputIndex;
} else if (mOutputIndex > 0 && mInputIndex > 0 && mTransposedCount > 0 && mSkippedCount == 0
&& !mCorrectionStates[mOutputIndex].mTransposing
&& mCorrectionStates[mOutputIndex - 1].mTransposing
&& mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false)
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
// Example:
// chcolate -> chocolate
// mmttx -> mmsmmmmmm
mTransposedCount -= 2;
++mSkippedCount;
--mInputIndex;
} else if (mInputIndex - 1 < mInputLength && (mExceeding || mTransposing)
&& mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false) && mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false)
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) { == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
if (mTransposing) { if (mTransposing) {
@ -282,11 +336,11 @@ Correction::CorrectionType Correction::processCharAndCalcState(
++mExcessiveCount; ++mExcessiveCount;
incrementInputIndex(); incrementInputIndex();
} }
} else if (mSkipping && mProximityCount == 0) { } else if (mProximityCount == 0 && noCorrectionsHappenedSoFar) {
// Skip this letter and continue deeper // Skip this letter and continue deeper
++mSkippedCount; ++mSkippedCount;
return processSkipChar(c, isTerminal); return processSkipChar(c, isTerminal, false);
} else if (checkProximityChars } else if (noCorrectionsHappenedSoFar
&& mInputIndex > 0 && mInputIndex > 0
&& mCorrectionStates[mOutputIndex].mProximityMatching && mCorrectionStates[mOutputIndex].mProximityMatching
&& mCorrectionStates[mOutputIndex].mSkipping && mCorrectionStates[mOutputIndex].mSkipping
@ -296,8 +350,13 @@ Correction::CorrectionType Correction::processCharAndCalcState(
// proximity chars of "s", but it should rather be handled as a skipped char. // proximity chars of "s", but it should rather be handled as a skipped char.
++mSkippedCount; ++mSkippedCount;
--mProximityCount; --mProximityCount;
return processSkipChar(c, isTerminal); return processSkipChar(c, isTerminal, false);
} else { } else {
if (DEBUG_CORRECTION) {
DUMP_WORD(mWord, mOutputIndex);
LOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
return UNRELATED; return UNRELATED;
} }
} else if (secondTransposing } else if (secondTransposing
@ -314,8 +373,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
mLastCharExceeded = mExcessiveCount == 0 && mSkippedCount == 0 mLastCharExceeded = mExcessiveCount == 0 && mSkippedCount == 0
&& mProximityCount == 0 && mTransposedCount == 0 && mProximityCount == 0 && mTransposedCount == 0
// TODO: remove this line once excessive correction is conmibned to others. && (mInputIndex == mInputLength - 2);
&& mExcessivePos >= 0 && (mInputIndex == mInputLength - 2);
const bool isSameAsUserTypedLength = (mInputLength == mInputIndex + 1) || mLastCharExceeded; const bool isSameAsUserTypedLength = (mInputLength == mInputIndex + 1) || mLastCharExceeded;
if (mLastCharExceeded) { if (mLastCharExceeded) {
++mExcessiveCount; ++mExcessiveCount;
@ -326,6 +384,9 @@ Correction::CorrectionType Correction::processCharAndCalcState(
startToTraverseAllNodes(); startToTraverseAllNodes();
} }
const bool needsToTryOnTerminalForTheLastPossibleExcessiveChar =
mExceeding && mInputIndex == mInputLength - 2;
// Finally, we are ready to go to the next character, the next "virtual node". // Finally, we are ready to go to the next character, the next "virtual node".
// We should advance the input index. // We should advance the input index.
// We do this in this branch of the 'if traverseAllNodes' because we are still matching // We do this in this branch of the 'if traverseAllNodes' because we are still matching
@ -335,7 +396,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
// Also, the next char is one "virtual node" depth more than this char. // Also, the next char is one "virtual node" depth more than this char.
incrementOutputIndex(); incrementOutputIndex();
if (isSameAsUserTypedLength && isTerminal) { if ((needsToTryOnTerminalForTheLastPossibleExcessiveChar
|| isSameAsUserTypedLength) && isTerminal) {
mTerminalInputIndex = mInputIndex - 1; mTerminalInputIndex = mInputIndex - 1;
mTerminalOutputIndex = mOutputIndex - 1; mTerminalOutputIndex = mOutputIndex - 1;
return ON_TERMINAL; return ON_TERMINAL;
@ -453,35 +515,25 @@ inline static int editDistance(
int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex, int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
const int freq, int* editDistanceTable, const Correction* correction) { const int freq, int* editDistanceTable, const Correction* correction) {
const int excessivePos = correction->getExcessivePos(); const int excessivePos = correction->getExcessivePos();
const int transposedPos = correction->getTransposedPos();
const int inputLength = correction->mInputLength; const int inputLength = correction->mInputLength;
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER; const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
const ProximityInfo *proximityInfo = correction->mProximityInfo; const ProximityInfo *proximityInfo = correction->mProximityInfo;
const int skippedCount = correction->mSkippedCount; const int skippedCount = correction->mSkippedCount;
const int transposedCount = correction->mTransposedCount; const int transposedCount = correction->mTransposedCount / 2;
const int excessiveCount = correction->mExcessiveCount; const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2;
const int proximityMatchedCount = correction->mProximityCount; const int proximityMatchedCount = correction->mProximityCount;
const bool lastCharExceeded = correction->mLastCharExceeded; const bool lastCharExceeded = correction->mLastCharExceeded;
if (skippedCount >= inputLength || inputLength == 0) { if (skippedCount >= inputLength || inputLength == 0) {
return -1; return -1;
} }
// TODO: remove // TODO: find more robust way
if (transposedPos >= 0 && transposedCount == 0) { bool sameLength = lastCharExceeded ? (inputLength == inputIndex + 2)
return -1;
}
// TODO: remove
if (excessivePos >= 0 && excessiveCount == 0) {
return -1;
}
const bool sameLength = lastCharExceeded ? (inputLength == inputIndex + 2)
: (inputLength == inputIndex + 1); : (inputLength == inputIndex + 1);
// TODO: use mExcessiveCount // TODO: use mExcessiveCount
int matchCount = inputLength - correction->mProximityCount - (excessivePos >= 0 ? 1 : 0); const int matchCount = inputLength - correction->mProximityCount - excessiveCount;
const unsigned short* word = correction->mWord; const unsigned short* word = correction->mWord;
const bool skipped = skippedCount > 0; const bool skipped = skippedCount > 0;
@ -490,29 +542,51 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
- getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength)); - getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength));
// TODO: Calculate edit distance for transposed and excessive // TODO: Calculate edit distance for transposed and excessive
int matchWeight;
int ed = 0; int ed = 0;
int adJustedProximityMatchedCount = proximityMatchedCount; int adjustedProximityMatchedCount = proximityMatchedCount;
int finalFreq = freq;
// TODO: Optimize this. // TODO: Optimize this.
if (excessivePos < 0 && transposedPos < 0 && (proximityMatchedCount > 0 || skipped)) { // TODO: Ignoring edit distance for transposed char, for now
if (transposedCount == 0 && (proximityMatchedCount > 0 || skipped || excessiveCount > 0)) {
const unsigned short* primaryInputWord = proximityInfo->getPrimaryInputWord(); const unsigned short* primaryInputWord = proximityInfo->getPrimaryInputWord();
ed = editDistance(editDistanceTable, primaryInputWord, ed = editDistance(editDistanceTable, primaryInputWord,
inputLength, word, outputIndex + 1); inputLength, word, outputIndex + 1);
matchWeight = powerIntCapped(typedLetterMultiplier, outputIndex + 1 - ed); const int matchWeight = powerIntCapped(typedLetterMultiplier,
if (ed == 1 && inputLength == outputIndex) { max(inputLength, outputIndex + 1) - ed);
// Promote a word with just one skipped char multiplyIntCapped(matchWeight, &finalFreq);
multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &matchWeight);
// TODO: Demote further if there are two or more excessive chars with longer user input?
if (inputLength > outputIndex + 1) {
multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq);
} }
ed = max(0, ed - quoteDiffCount); ed = max(0, ed - quoteDiffCount);
adJustedProximityMatchedCount = min(max(0, ed - (outputIndex + 1 - inputLength)),
if (ed == 1 && (inputLength == outputIndex || inputLength == outputIndex + 2)) {
// Promote a word with just one skipped or excessive char
if (sameLength) {
multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
} else {
multiplyIntCapped(typedLetterMultiplier, &finalFreq);
}
} else if (ed == 0) {
multiplyIntCapped(typedLetterMultiplier, &finalFreq);
sameLength = true;
}
adjustedProximityMatchedCount = min(max(0, ed - (outputIndex + 1 - inputLength)),
proximityMatchedCount); proximityMatchedCount);
} else { } else {
matchWeight = powerIntCapped(typedLetterMultiplier, matchCount); // TODO: Calculate the edit distance for transposed char
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
multiplyIntCapped(matchWeight, &finalFreq);
} }
// TODO: Demote by edit distance if (proximityInfo->getMatchedProximityId(0, word[0], true)
int finalFreq = freq * matchWeight; == ProximityInfo::UNRELATED_CHAR) {
multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq);
}
/////////////////////////////////////////////// ///////////////////////////////////////////////
// Promotion and Demotion for each correction // Promotion and Demotion for each correction
@ -530,13 +604,16 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
} }
// Demotion for a word with transposed character // Demotion for a word with transposed character
if (transposedPos >= 0) multiplyRate( if (transposedCount > 0) multiplyRate(
WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq); WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
// Demotion for a word with excessive character // Demotion for a word with excessive character
if (excessivePos >= 0) { if (excessiveCount > 0) {
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq); multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
if (!proximityInfo->existsAdjacentProximityChars(inputIndex)) { if (!lastCharExceeded && !proximityInfo->existsAdjacentProximityChars(excessivePos)) {
if (DEBUG_CORRECTION_FREQ) {
LOGI("Double excessive demotion");
}
// If an excessive character is not adjacent to the left char or the right char, // If an excessive character is not adjacent to the left char or the right char,
// we will demote this word. // we will demote this word.
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq); multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
@ -544,7 +621,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
} }
// Promotion for a word with proximity characters // Promotion for a word with proximity characters
for (int i = 0; i < adJustedProximityMatchedCount; ++i) { for (int i = 0; i < adjustedProximityMatchedCount; ++i) {
// A word with proximity corrections // A word with proximity corrections
if (DEBUG_DICT_FULL) { if (DEBUG_DICT_FULL) {
LOGI("Found a proximity correction."); LOGI("Found a proximity correction.");
@ -553,20 +630,22 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
} }
const int errorCount = proximityMatchedCount + skippedCount; const int errorCount = adjustedProximityMatchedCount > 0
? adjustedProximityMatchedCount
: (proximityMatchedCount + transposedCount);
multiplyRate( multiplyRate(
100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputLength, &finalFreq); 100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputLength, &finalFreq);
// Promotion for an exactly matched word // Promotion for an exactly matched word
if (matchCount == outputIndex + 1) { if (ed == 0) {
// Full exact match // Full exact match
if (sameLength && transposedPos < 0 && !skipped && excessivePos < 0) { if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0) {
finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq); finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
} }
} }
// Promote a word with no correction // Promote a word with no correction
if (proximityMatchedCount == 0 && transposedPos < 0 && !skipped && excessivePos < 0) { if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0) {
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq); multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
} }
@ -590,6 +669,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq); multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
} }
// TODO: Do not use sameLength?
if (sameLength) { if (sameLength) {
multiplyIntCapped(fullWordMultiplier, &finalFreq); multiplyIntCapped(fullWordMultiplier, &finalFreq);
} }
@ -598,6 +678,13 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
LOGI("calc: %d, %d", outputIndex, sameLength); LOGI("calc: %d, %d", outputIndex, sameLength);
} }
if (DEBUG_CORRECTION_FREQ) {
DUMP_WORD(correction->mWord, outputIndex + 1);
LOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d", proximityMatchedCount,
skippedCount, transposedCount, excessiveCount, lastCharExceeded, sameLength,
quoteDiffCount, ed, finalFreq);
}
return finalFreq; return finalFreq;
} }

View File

@ -99,7 +99,8 @@ private:
inline bool needsToTraverseAllNodes(); inline bool needsToTraverseAllNodes();
inline void startToTraverseAllNodes(); inline void startToTraverseAllNodes();
inline bool isQuote(const unsigned short c); inline bool isQuote(const unsigned short c);
inline CorrectionType processSkipChar(const int32_t c, const bool isTerminal); inline CorrectionType processSkipChar(
const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
// TODO: remove // TODO: remove
inline void incrementProximityCount() { inline void incrementProximityCount() {

View File

@ -95,10 +95,12 @@ static void prof_out(void) {
#define DEBUG_DICT true #define DEBUG_DICT true
#define DEBUG_DICT_FULL false #define DEBUG_DICT_FULL false
#define DEBUG_EDIT_DISTANCE false #define DEBUG_EDIT_DISTANCE false
#define DEBUG_SHOW_FOUND_WORD DEBUG_DICT_FULL #define DEBUG_SHOW_FOUND_WORD false
#define DEBUG_NODE DEBUG_DICT_FULL #define DEBUG_NODE DEBUG_DICT_FULL
#define DEBUG_TRACE DEBUG_DICT_FULL #define DEBUG_TRACE DEBUG_DICT_FULL
#define DEBUG_PROXIMITY_INFO true #define DEBUG_PROXIMITY_INFO true
#define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ true
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0) #define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
@ -121,6 +123,8 @@ static void dumpWord(const unsigned short* word, const int length) {
#define DEBUG_NODE false #define DEBUG_NODE false
#define DEBUG_TRACE false #define DEBUG_TRACE false
#define DEBUG_PROXIMITY_INFO false #define DEBUG_PROXIMITY_INFO false
#define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ false
#define DUMP_WORD(word, length) #define DUMP_WORD(word, length)
@ -178,7 +182,9 @@ static void dumpWord(const unsigned short* word, const int length) {
#define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90 #define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90
#define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105 #define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105
#define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE 160 #define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE 160
#define CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE 42 #define CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE 45
#define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70
#define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java // This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
// This is only used for the size of array. Not to be used in c functions. // This is only used for the size of array. Not to be used in c functions.

View File

@ -189,32 +189,19 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
// TODO: remove // TODO: remove
PROF_START(1); PROF_START(1);
// Note: This line is intentionally left blank getSuggestionCandidates();
PROF_END(1); PROF_END(1);
PROF_START(2); PROF_START(2);
// Suggestion with missing character // Note: This line is intentionally left blank
if (DEBUG_DICT) {
LOGI("--- Suggest missing characters");
}
getSuggestionCandidates(0, -1, -1);
PROF_END(2); PROF_END(2);
PROF_START(3); PROF_START(3);
// Suggestion with excessive character // Note: This line is intentionally left blank
if (DEBUG_DICT) {
LOGI("--- Suggest excessive characters");
}
getSuggestionCandidates(-1, 0, -1);
PROF_END(3); PROF_END(3);
PROF_START(4); PROF_START(4);
// Suggestion with transposed characters // Note: This line is intentionally left blank
// Only suggest words that length is mInputLength
if (DEBUG_DICT) {
LOGI("--- Suggest transposed characters");
}
getSuggestionCandidates(-1, -1, 0);
PROF_END(4); PROF_END(4);
PROF_START(5); PROF_START(5);
@ -328,14 +315,9 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
static const char QUOTE = '\''; static const char QUOTE = '\'';
static const char SPACE = ' '; static const char SPACE = ' ';
void UnigramDictionary::getSuggestionCandidates(const int skipPos, void UnigramDictionary::getSuggestionCandidates() {
const int excessivePos, const int transposedPos) { // TODO: Remove setCorrectionParams
if (DEBUG_DICT) { mCorrection->setCorrectionParams(0, 0, 0,
assert(transposedPos + 1 < mInputLength);
assert(excessivePos < mInputLength);
assert(missingPos < mInputLength);
}
mCorrection->setCorrectionParams(skipPos, excessivePos, transposedPos,
-1 /* spaceProximityPos */, -1 /* missingSpacePos */); -1 /* spaceProximityPos */, -1 /* missingSpacePos */);
int rootPosition = ROOT_POS; int rootPosition = ROOT_POS;
// Get the number of children of root, then increment the position // Get the number of children of root, then increment the position
@ -727,6 +709,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
pos = BinaryFormat::skipFrequency(flags, pos); pos = BinaryFormat::skipFrequency(flags, pos);
*nextSiblingPosition = *nextSiblingPosition =
BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos); BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
if (DEBUG_DICT_FULL) {
LOGI("Traversing was pruned.");
}
return false; return false;
} }
} }

View File

@ -87,8 +87,7 @@ private:
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, const int *ycoordinates, const int *codes, const int codesSize,
unsigned short *outWords, int *frequencies); unsigned short *outWords, int *frequencies);
void getSuggestionCandidates(const int skipPos, const int excessivePos, void getSuggestionCandidates();
const int transposedPos);
bool addWord(unsigned short *word, int length, int frequency); bool addWord(unsigned short *word, int length, int frequency);
void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction); void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction);
void getMissingSpaceWords( void getMissingSpaceWords(