Do the transposed correction and the excessive correction by one loop
Change-Id: Idc7a3451a65f7b980e5c499e9083f67646b3a199main
parent
961453c3b3
commit
9db2097f7b
|
@ -56,17 +56,22 @@ void Correction::initCorrectionState(
|
|||
const int rootPos, const int childCount, const bool traverseAll) {
|
||||
latinime::initCorrectionState(mCorrectionStates, rootPos, childCount, traverseAll);
|
||||
// TODO: remove
|
||||
mCorrectionStates[0].mTransposedPos = mTransposedPos;
|
||||
mCorrectionStates[0].mExcessivePos = mExcessivePos;
|
||||
mCorrectionStates[0].mSkipPos = mSkipPos;
|
||||
}
|
||||
|
||||
void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
|
||||
const int transposedPos, const int spaceProximityPos, const int missingSpacePos) {
|
||||
// TODO: remove
|
||||
mTransposedPos = transposedPos;
|
||||
mExcessivePos = excessivePos;
|
||||
mSkipPos = skipPos;
|
||||
// TODO: remove
|
||||
mCorrectionStates[0].mTransposedPos = transposedPos;
|
||||
mCorrectionStates[0].mExcessivePos = excessivePos;
|
||||
mCorrectionStates[0].mSkipPos = skipPos;
|
||||
mExcessivePos = excessivePos;
|
||||
mTransposedPos = transposedPos;
|
||||
|
||||
mSpaceProximityPos = spaceProximityPos;
|
||||
mMissingSpacePos = missingSpacePos;
|
||||
}
|
||||
|
@ -107,12 +112,23 @@ bool Correction::initProcessState(const int outputIndex) {
|
|||
--(mCorrectionStates[outputIndex].mChildCount);
|
||||
mInputIndex = mCorrectionStates[outputIndex].mInputIndex;
|
||||
mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes;
|
||||
|
||||
mProximityCount = mCorrectionStates[outputIndex].mProximityCount;
|
||||
mTransposedCount = mCorrectionStates[outputIndex].mTransposedCount;
|
||||
mExcessiveCount = mCorrectionStates[outputIndex].mExcessiveCount;
|
||||
mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount;
|
||||
mLastCharExceeded = mCorrectionStates[outputIndex].mLastCharExceeded;
|
||||
|
||||
mTransposedPos = mCorrectionStates[outputIndex].mTransposedPos;
|
||||
mExcessivePos = mCorrectionStates[outputIndex].mExcessivePos;
|
||||
mSkipPos = mCorrectionStates[outputIndex].mSkipPos;
|
||||
mSkipping = false;
|
||||
mProximityMatching = false;
|
||||
|
||||
mMatching = false;
|
||||
mProximityMatching = false;
|
||||
mTransposing = false;
|
||||
mExceeding = false;
|
||||
mSkipping = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -150,12 +166,23 @@ void Correction::incrementOutputIndex() {
|
|||
mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
|
||||
mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
|
||||
mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
|
||||
|
||||
mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
|
||||
mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount;
|
||||
mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount;
|
||||
mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
|
||||
mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
|
||||
|
||||
mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
|
||||
mCorrectionStates[mOutputIndex].mTransposedPos = mTransposedPos;
|
||||
mCorrectionStates[mOutputIndex].mExcessivePos = mExcessivePos;
|
||||
|
||||
mCorrectionStates[mOutputIndex].mLastCharExceeded = mLastCharExceeded;
|
||||
|
||||
mCorrectionStates[mOutputIndex].mMatching = mMatching;
|
||||
mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching;
|
||||
mCorrectionStates[mOutputIndex].mTransposing = mTransposing;
|
||||
mCorrectionStates[mOutputIndex].mExceeding = mExceeding;
|
||||
mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
|
||||
}
|
||||
|
||||
void Correction::startToTraverseAllNodes() {
|
||||
|
@ -184,13 +211,16 @@ Correction::CorrectionType Correction::processSkipChar(
|
|||
Correction::CorrectionType Correction::processCharAndCalcState(
|
||||
const int32_t c, const bool isTerminal) {
|
||||
CorrectionType currentStateType = NOT_ON_TERMINAL;
|
||||
// This has to be done for each virtual char (this forwards the "inputIndex" which
|
||||
// is the index in the user-inputted chars, as read by proximity chars.
|
||||
if (mExcessivePos == mOutputIndex && mInputIndex < mInputLength - 1) {
|
||||
incrementInputIndex();
|
||||
|
||||
if (mExcessivePos >= 0) {
|
||||
if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) {
|
||||
++mExcessivePos;
|
||||
}
|
||||
if (mExcessivePos < mInputLength - 1) {
|
||||
mExceeding = mExcessivePos == mInputIndex;
|
||||
}
|
||||
}
|
||||
|
||||
bool skip = false;
|
||||
if (mSkipPos >= 0) {
|
||||
if (mSkippedCount == 0 && mSkipPos < mOutputIndex) {
|
||||
if (DEBUG_DICT) {
|
||||
|
@ -198,41 +228,63 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
|||
}
|
||||
++mSkipPos;
|
||||
}
|
||||
skip = mSkipPos == mOutputIndex;
|
||||
mSkipping = true;
|
||||
mSkipping = mSkipPos == mOutputIndex;
|
||||
}
|
||||
|
||||
if (mTransposedPos >= 0) {
|
||||
if (mTransposedCount == 0 && mTransposedPos < mOutputIndex) {
|
||||
++mTransposedPos;
|
||||
}
|
||||
if (mTransposedPos < mInputLength - 1) {
|
||||
mTransposing = mInputIndex == mTransposedPos;
|
||||
}
|
||||
}
|
||||
|
||||
if (mNeedsToTraverseAllNodes || isQuote(c)) {
|
||||
return processSkipChar(c, isTerminal);
|
||||
} else {
|
||||
int inputIndexForProximity = mInputIndex;
|
||||
|
||||
if (mTransposedPos >= 0) {
|
||||
if (mInputIndex == mTransposedPos) {
|
||||
++inputIndexForProximity;
|
||||
}
|
||||
if (mInputIndex == (mTransposedPos + 1)) {
|
||||
--inputIndexForProximity;
|
||||
bool secondTransposing = false;
|
||||
if (mTransposedCount % 2 == 1) {
|
||||
if (mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false)
|
||||
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
|
||||
++mTransposedCount;
|
||||
secondTransposing = true;
|
||||
} else if (mCorrectionStates[mOutputIndex].mExceeding) {
|
||||
--mTransposedCount;
|
||||
++mExcessiveCount;
|
||||
incrementInputIndex();
|
||||
} else {
|
||||
--mTransposedCount;
|
||||
return UNRELATED;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: sum counters
|
||||
const bool checkProximityChars =
|
||||
!(mSkippedCount > 0 || mExcessivePos >= 0 || mTransposedPos >= 0);
|
||||
int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
|
||||
inputIndexForProximity, c, checkProximityChars);
|
||||
// TODO: do not check if second transposing
|
||||
const int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
|
||||
mInputIndex, c, checkProximityChars);
|
||||
|
||||
if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
|
||||
if (skip && mProximityCount == 0) {
|
||||
if (!secondTransposing && ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
|
||||
if (mInputIndex - 1 < mInputLength && (mExceeding || mTransposing)
|
||||
&& mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false)
|
||||
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
|
||||
if (mTransposing) {
|
||||
++mTransposedCount;
|
||||
} else {
|
||||
++mExcessiveCount;
|
||||
incrementInputIndex();
|
||||
}
|
||||
} else if (mSkipping && mProximityCount == 0) {
|
||||
// Skip this letter and continue deeper
|
||||
++mSkippedCount;
|
||||
return processSkipChar(c, isTerminal);
|
||||
} else if (checkProximityChars
|
||||
&& inputIndexForProximity > 0
|
||||
&& mInputIndex > 0
|
||||
&& mCorrectionStates[mOutputIndex].mProximityMatching
|
||||
&& mCorrectionStates[mOutputIndex].mSkipping
|
||||
&& mProximityInfo->getMatchedProximityId(
|
||||
inputIndexForProximity - 1, c, false)
|
||||
&& mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false)
|
||||
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
|
||||
// Note: This logic tries saving cases like contrst --> contrast -- "a" is one of
|
||||
// proximity chars of "s", but it should rather be handled as a skipped char.
|
||||
|
@ -242,7 +294,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
|||
} else {
|
||||
return UNRELATED;
|
||||
}
|
||||
} else if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
|
||||
} else if (secondTransposing
|
||||
|| ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
|
||||
// If inputIndex is greater than mInputLength, that means there is no
|
||||
// proximity chars. So, we don't need to check proximity.
|
||||
mMatching = true;
|
||||
|
@ -253,10 +306,15 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
|||
|
||||
mWord[mOutputIndex] = c;
|
||||
|
||||
const bool isSameAsUserTypedLength = mInputLength
|
||||
== getInputIndex() + 1
|
||||
|| (mExcessivePos == mInputLength - 1
|
||||
&& getInputIndex() == mInputLength - 2);
|
||||
mLastCharExceeded = mExcessiveCount == 0 && mSkippedCount == 0
|
||||
&& mProximityCount == 0 && mTransposedCount == 0
|
||||
// TODO: remove this line once excessive correction is conmibned to others.
|
||||
&& mExcessivePos >= 0 && (mInputIndex == mInputLength - 2);
|
||||
const bool isSameAsUserTypedLength = (mInputLength == mInputIndex + 1) || mLastCharExceeded;
|
||||
if (mLastCharExceeded) {
|
||||
// TODO: Decrement mExcessiveCount if next char is matched word.
|
||||
++mExcessiveCount;
|
||||
}
|
||||
if (isSameAsUserTypedLength && isTerminal) {
|
||||
mTerminalInputIndex = mInputIndex;
|
||||
mTerminalOutputIndex = mOutputIndex;
|
||||
|
@ -395,20 +453,33 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
|
|||
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
|
||||
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
|
||||
const ProximityInfo *proximityInfo = correction->mProximityInfo;
|
||||
const int skipCount = correction->mSkippedCount;
|
||||
const int skippedCount = correction->mSkippedCount;
|
||||
const int transposedCount = correction->mTransposedCount;
|
||||
const int excessiveCount = correction->mExcessiveCount;
|
||||
const int proximityMatchedCount = correction->mProximityCount;
|
||||
if (skipCount >= inputLength || inputLength == 0) {
|
||||
const bool lastCharExceeded = correction->mLastCharExceeded;
|
||||
if (skippedCount >= inputLength || inputLength == 0) {
|
||||
return -1;
|
||||
}
|
||||
const bool sameLength = (excessivePos == inputLength - 1) ? (inputLength == inputIndex + 2)
|
||||
: (inputLength == inputIndex + 1);
|
||||
|
||||
// TODO: remove
|
||||
if (transposedPos >= 0 && transposedCount == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// TODO: remove
|
||||
if (excessivePos >= 0 && excessiveCount == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const bool sameLength = lastCharExceeded ? (inputLength == inputIndex + 2)
|
||||
: (inputLength == inputIndex + 1);
|
||||
|
||||
// TODO: use mExcessiveCount
|
||||
int matchCount = inputLength - correction->mProximityCount - (excessivePos >= 0 ? 1 : 0);
|
||||
|
||||
const unsigned short* word = correction->mWord;
|
||||
const bool skipped = skipCount > 0;
|
||||
const bool skipped = skippedCount > 0;
|
||||
|
||||
const int quoteDiffCount = max(0, getQuoteCount(word, outputIndex + 1)
|
||||
- getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength));
|
||||
|
@ -417,6 +488,8 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
|
|||
int matchWeight;
|
||||
int ed = 0;
|
||||
int adJustedProximityMatchedCount = proximityMatchedCount;
|
||||
|
||||
// TODO: Optimize this.
|
||||
if (excessivePos < 0 && transposedPos < 0 && (proximityMatchedCount > 0 || skipped)) {
|
||||
const unsigned short* primaryInputWord = proximityInfo->getPrimaryInputWord();
|
||||
ed = editDistance(editDistanceTable, primaryInputWord,
|
||||
|
@ -475,7 +548,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
|
|||
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
|
||||
}
|
||||
|
||||
const int errorCount = proximityMatchedCount + skipCount;
|
||||
const int errorCount = proximityMatchedCount + skippedCount;
|
||||
multiplyRate(
|
||||
100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputLength, &finalFreq);
|
||||
|
||||
|
|
|
@ -113,8 +113,6 @@ private:
|
|||
int mMaxEditDistance;
|
||||
int mMaxDepth;
|
||||
int mInputLength;
|
||||
int mExcessivePos;
|
||||
int mTransposedPos;
|
||||
int mSpaceProximityPos;
|
||||
int mMissingSpacePos;
|
||||
int mTerminalInputIndex;
|
||||
|
@ -126,15 +124,26 @@ private:
|
|||
CorrectionState mCorrectionStates[MAX_WORD_LENGTH_INTERNAL];
|
||||
|
||||
// The following member variables are being used as cache values of the correction state.
|
||||
bool mNeedsToTraverseAllNodes;
|
||||
int mOutputIndex;
|
||||
int mInputIndex;
|
||||
|
||||
int mProximityCount;
|
||||
int mExcessiveCount;
|
||||
int mTransposedCount;
|
||||
int mSkippedCount;
|
||||
|
||||
int mTransposedPos;
|
||||
int mExcessivePos;
|
||||
int mSkipPos;
|
||||
bool mNeedsToTraverseAllNodes;
|
||||
|
||||
bool mLastCharExceeded;
|
||||
|
||||
bool mMatching;
|
||||
bool mSkipping;
|
||||
bool mProximityMatching;
|
||||
bool mExceeding;
|
||||
bool mTransposing;
|
||||
bool mSkipping;
|
||||
|
||||
class RankingAlgorithm {
|
||||
public:
|
||||
|
|
|
@ -28,12 +28,25 @@ struct CorrectionState {
|
|||
int mSiblingPos;
|
||||
uint16_t mChildCount;
|
||||
uint8_t mInputIndex;
|
||||
|
||||
uint8_t mProximityCount;
|
||||
uint8_t mTransposedCount;
|
||||
uint8_t mExcessiveCount;
|
||||
uint8_t mSkippedCount;
|
||||
|
||||
int8_t mTransposedPos;
|
||||
int8_t mExcessivePos;
|
||||
int8_t mSkipPos; // should be signed
|
||||
|
||||
// TODO: int?
|
||||
bool mLastCharExceeded;
|
||||
|
||||
bool mMatching;
|
||||
bool mTransposing;
|
||||
bool mExceeding;
|
||||
bool mSkipping;
|
||||
bool mProximityMatching;
|
||||
|
||||
bool mNeedsToTraverseAllNodes;
|
||||
|
||||
};
|
||||
|
@ -43,14 +56,27 @@ inline static void initCorrectionState(CorrectionState *state, const int rootPos
|
|||
state->mParentIndex = -1;
|
||||
state->mChildCount = childCount;
|
||||
state->mInputIndex = 0;
|
||||
state->mProximityCount = 0;
|
||||
state->mSiblingPos = rootPos;
|
||||
state->mSkippedCount = 0;
|
||||
state->mMatching = false;
|
||||
state->mSkipping = false;
|
||||
state->mProximityMatching = false;
|
||||
state->mNeedsToTraverseAllNodes = traverseAll;
|
||||
|
||||
state->mTransposedPos = -1;
|
||||
state->mExcessivePos = -1;
|
||||
state->mSkipPos = -1;
|
||||
|
||||
|
||||
state->mProximityCount = 0;
|
||||
state->mTransposedCount = 0;
|
||||
state->mExcessiveCount = 0;
|
||||
state->mSkippedCount = 0;
|
||||
|
||||
state->mLastCharExceeded = false;
|
||||
|
||||
state->mMatching = false;
|
||||
state->mProximityMatching = false;
|
||||
state->mTransposing = false;
|
||||
state->mExceeding = false;
|
||||
state->mSkipping = false;
|
||||
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -194,34 +194,27 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
|||
|
||||
PROF_START(2);
|
||||
// Suggestion with missing character
|
||||
if (DEBUG_DICT) {
|
||||
LOGI("--- Suggest missing characters");
|
||||
}
|
||||
getSuggestionCandidates(0, -1, -1);
|
||||
PROF_END(2);
|
||||
|
||||
PROF_START(3);
|
||||
// Suggestion with excessive character
|
||||
if (SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER
|
||||
&& mInputLength >= MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION) {
|
||||
for (int i = 0; i < codesSize; ++i) {
|
||||
if (DEBUG_DICT) {
|
||||
LOGI("--- Suggest excessive characters %d", i);
|
||||
}
|
||||
getSuggestionCandidates(-1, i, -1);
|
||||
}
|
||||
LOGI("--- Suggest excessive characters");
|
||||
}
|
||||
getSuggestionCandidates(-1, 0, -1);
|
||||
PROF_END(3);
|
||||
|
||||
PROF_START(4);
|
||||
// Suggestion with transposed characters
|
||||
// Only suggest words that length is mInputLength
|
||||
if (SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS) {
|
||||
for (int i = 0; i < codesSize; ++i) {
|
||||
if (DEBUG_DICT) {
|
||||
LOGI("--- Suggest transposed characters %d", i);
|
||||
}
|
||||
getSuggestionCandidates(-1, -1, i);
|
||||
}
|
||||
LOGI("--- Suggest transposed characters");
|
||||
}
|
||||
getSuggestionCandidates(-1, -1, 0);
|
||||
PROF_END(4);
|
||||
|
||||
PROF_START(5);
|
||||
|
|
Loading…
Reference in New Issue