Merge "Refactor the correction algorithm related to missing character correction"
commit
33f3b60cf0
|
@ -30,10 +30,9 @@ namespace latinime {
|
||||||
//////////////////////
|
//////////////////////
|
||||||
static const char QUOTE = '\'';
|
static const char QUOTE = '\'';
|
||||||
|
|
||||||
inline bool CorrectionState::needsToSkipCurrentNode(const unsigned short c) {
|
inline bool CorrectionState::isQuote(const unsigned short c) {
|
||||||
const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex);
|
const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex);
|
||||||
// Skip the ' or other letter and continue deeper
|
return (c == QUOTE && userTypedChar != QUOTE);
|
||||||
return (c == QUOTE && userTypedChar != QUOTE) || mSkipPos == mOutputIndex;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////
|
/////////////////////
|
||||||
|
@ -50,6 +49,7 @@ void CorrectionState::initCorrectionState(const ProximityInfo *pi, const int inp
|
||||||
mInputLength = inputLength;
|
mInputLength = inputLength;
|
||||||
mMaxDepth = maxDepth;
|
mMaxDepth = maxDepth;
|
||||||
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
|
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
|
||||||
|
mSkippedOutputIndex = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CorrectionState::setCorrectionParams(const int skipPos, const int excessivePos,
|
void CorrectionState::setCorrectionParams(const int skipPos, const int excessivePos,
|
||||||
|
@ -77,9 +77,8 @@ int CorrectionState::getFreqForSplitTwoWords(const int firstFreq, const int seco
|
||||||
}
|
}
|
||||||
|
|
||||||
int CorrectionState::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
|
int CorrectionState::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
|
||||||
const int outputIndex = mOutputIndex - 1;
|
const int outputIndex = mTerminalOutputIndex;
|
||||||
const int inputIndex = (mCurrentStateType == TRAVERSE_ALL_ON_TERMINAL
|
const int inputIndex = mTerminalInputIndex;
|
||||||
|| mCurrentStateType == TRAVERSE_ALL_NOT_ON_TERMINAL) ? mInputIndex : mInputIndex - 1;
|
|
||||||
*wordLength = outputIndex + 1;
|
*wordLength = outputIndex + 1;
|
||||||
if (mProximityInfo->sameAsTyped(mWord, outputIndex + 1) || outputIndex < MIN_SUGGEST_DEPTH) {
|
if (mProximityInfo->sameAsTyped(mWord, outputIndex + 1) || outputIndex < MIN_SUGGEST_DEPTH) {
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -145,22 +144,36 @@ bool CorrectionState::needsToPrune() const {
|
||||||
|| mDiffs > mMaxEditDistance);
|
|| mDiffs > mMaxEditDistance);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CorrectionState::CorrectionStateType CorrectionState::processSkipChar(
|
||||||
|
const int32_t c, const bool isTerminal) {
|
||||||
|
mWord[mOutputIndex] = c;
|
||||||
|
if (needsToTraverseAll() && isTerminal) {
|
||||||
|
mTerminalInputIndex = mInputIndex;
|
||||||
|
mTerminalOutputIndex = mOutputIndex;
|
||||||
|
incrementOutputIndex();
|
||||||
|
return TRAVERSE_ALL_ON_TERMINAL;
|
||||||
|
} else {
|
||||||
|
incrementOutputIndex();
|
||||||
|
return TRAVERSE_ALL_NOT_ON_TERMINAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState(
|
CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState(
|
||||||
const int32_t c, const bool isTerminal) {
|
const int32_t c, const bool isTerminal) {
|
||||||
mCurrentStateType = NOT_ON_TERMINAL;
|
CorrectionStateType currentStateType = NOT_ON_TERMINAL;
|
||||||
// This has to be done for each virtual char (this forwards the "inputIndex" which
|
// This has to be done for each virtual char (this forwards the "inputIndex" which
|
||||||
// is the index in the user-inputted chars, as read by proximity chars.
|
// is the index in the user-inputted chars, as read by proximity chars.
|
||||||
if (mExcessivePos == mOutputIndex && mInputIndex < mInputLength - 1) {
|
if (mExcessivePos == mOutputIndex && mInputIndex < mInputLength - 1) {
|
||||||
incrementInputIndex();
|
incrementInputIndex();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mTraverseAllNodes || needsToSkipCurrentNode(c)) {
|
bool skip = false;
|
||||||
mWord[mOutputIndex] = c;
|
if (mSkipPos >= 0) {
|
||||||
if (needsToTraverseAll() && isTerminal) {
|
skip = mSkipPos == mOutputIndex;
|
||||||
mCurrentStateType = TRAVERSE_ALL_ON_TERMINAL;
|
}
|
||||||
} else {
|
|
||||||
mCurrentStateType = TRAVERSE_ALL_NOT_ON_TERMINAL;
|
if (mTraverseAllNodes || isQuote(c)) {
|
||||||
}
|
return processSkipChar(c, isTerminal);
|
||||||
} else {
|
} else {
|
||||||
int inputIndexForProximity = mInputIndex;
|
int inputIndexForProximity = mInputIndex;
|
||||||
|
|
||||||
|
@ -173,12 +186,30 @@ CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const bool checkProximityChars =
|
||||||
|
!(mSkipPos >= 0 || mExcessivePos >= 0 || mTransposedPos >= 0);
|
||||||
int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
|
int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
|
||||||
inputIndexForProximity, c, this);
|
inputIndexForProximity, c, checkProximityChars);
|
||||||
if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
|
|
||||||
mCurrentStateType = UNRELATED;
|
const bool unrelated = ProximityInfo::UNRELATED_CHAR == matchedProximityCharId;
|
||||||
return mCurrentStateType;
|
if (unrelated) {
|
||||||
|
if (skip) {
|
||||||
|
// Skip this letter and continue deeper
|
||||||
|
mSkippedOutputIndex = mOutputIndex;
|
||||||
|
return processSkipChar(c, isTerminal);
|
||||||
|
} else {
|
||||||
|
return UNRELATED;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// No need to skip. Finish traversing and increment skipPos.
|
||||||
|
// TODO: Remove this?
|
||||||
|
if (skip) {
|
||||||
|
mWord[mOutputIndex] = c;
|
||||||
|
incrementOutputIndex();
|
||||||
|
return TRAVERSE_ALL_NOT_ON_TERMINAL;
|
||||||
|
}
|
||||||
|
|
||||||
mWord[mOutputIndex] = c;
|
mWord[mOutputIndex] = c;
|
||||||
// If inputIndex is greater than mInputLength, that means there is no
|
// If inputIndex is greater than mInputLength, that means there is no
|
||||||
// proximity chars. So, we don't need to check proximity.
|
// proximity chars. So, we don't need to check proximity.
|
||||||
|
@ -195,7 +226,9 @@ CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState(
|
||||||
|| (mExcessivePos == mInputLength - 1
|
|| (mExcessivePos == mInputLength - 1
|
||||||
&& getInputIndex() == mInputLength - 2);
|
&& getInputIndex() == mInputLength - 2);
|
||||||
if (isSameAsUserTypedLength && isTerminal) {
|
if (isSameAsUserTypedLength && isTerminal) {
|
||||||
mCurrentStateType = ON_TERMINAL;
|
mTerminalInputIndex = mInputIndex;
|
||||||
|
mTerminalOutputIndex = mOutputIndex;
|
||||||
|
currentStateType = ON_TERMINAL;
|
||||||
}
|
}
|
||||||
// Start traversing all nodes after the index exceeds the user typed length
|
// Start traversing all nodes after the index exceeds the user typed length
|
||||||
if (isSameAsUserTypedLength) {
|
if (isSameAsUserTypedLength) {
|
||||||
|
@ -213,7 +246,7 @@ CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState(
|
||||||
// Also, the next char is one "virtual node" depth more than this char.
|
// Also, the next char is one "virtual node" depth more than this char.
|
||||||
incrementOutputIndex();
|
incrementOutputIndex();
|
||||||
|
|
||||||
return mCurrentStateType;
|
return currentStateType;
|
||||||
}
|
}
|
||||||
|
|
||||||
CorrectionState::~CorrectionState() {
|
CorrectionState::~CorrectionState() {
|
||||||
|
|
|
@ -101,6 +101,7 @@ private:
|
||||||
int mMaxDepth;
|
int mMaxDepth;
|
||||||
int mInputLength;
|
int mInputLength;
|
||||||
int mSkipPos;
|
int mSkipPos;
|
||||||
|
int mSkippedOutputIndex;
|
||||||
int mExcessivePos;
|
int mExcessivePos;
|
||||||
int mTransposedPos;
|
int mTransposedPos;
|
||||||
int mSpaceProximityPos;
|
int mSpaceProximityPos;
|
||||||
|
@ -109,12 +110,14 @@ private:
|
||||||
int mMatchedCharCount;
|
int mMatchedCharCount;
|
||||||
int mInputIndex;
|
int mInputIndex;
|
||||||
int mOutputIndex;
|
int mOutputIndex;
|
||||||
|
int mTerminalInputIndex;
|
||||||
|
int mTerminalOutputIndex;
|
||||||
int mDiffs;
|
int mDiffs;
|
||||||
bool mTraverseAllNodes;
|
bool mTraverseAllNodes;
|
||||||
CorrectionStateType mCurrentStateType;
|
|
||||||
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
||||||
|
|
||||||
inline bool needsToSkipCurrentNode(const unsigned short c);
|
inline bool isQuote(const unsigned short c);
|
||||||
|
inline CorrectionStateType processSkipChar(const int32_t c, const bool isTerminal);
|
||||||
|
|
||||||
class RankingAlgorithm {
|
class RankingAlgorithm {
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -114,10 +114,7 @@ bool ProximityInfo::existsAdjacentProximityChars(const int index) const {
|
||||||
// in their list. The non-accented version of the character should be considered
|
// in their list. The non-accented version of the character should be considered
|
||||||
// "close", but not the other keys close to the non-accented version.
|
// "close", but not the other keys close to the non-accented version.
|
||||||
ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(
|
ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(
|
||||||
const int index, const unsigned short c, CorrectionState *correctionState) const {
|
const int index, const unsigned short c, const bool checkProximityChars) const {
|
||||||
const int skipPos = correctionState->getSkipPos();
|
|
||||||
const int excessivePos = correctionState->getExcessivePos();
|
|
||||||
const int transposedPos = correctionState->getTransposedPos();
|
|
||||||
const int *currentChars = getProximityCharsAt(index);
|
const int *currentChars = getProximityCharsAt(index);
|
||||||
const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
|
const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
|
||||||
|
|
||||||
|
@ -126,9 +123,7 @@ ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(
|
||||||
if (currentChars[0] == baseLowerC || currentChars[0] == c)
|
if (currentChars[0] == baseLowerC || currentChars[0] == c)
|
||||||
return SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR;
|
return SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR;
|
||||||
|
|
||||||
// If one of those is true, we should not check for close characters at all.
|
if (!checkProximityChars) return UNRELATED_CHAR;
|
||||||
if (skipPos >= 0 || excessivePos >= 0 || transposedPos >= 0)
|
|
||||||
return UNRELATED_CHAR;
|
|
||||||
|
|
||||||
// If the non-accented, lowercased version of that first character matches c,
|
// If the non-accented, lowercased version of that first character matches c,
|
||||||
// then we have a non-accented version of the accented character the user
|
// then we have a non-accented version of the accented character the user
|
||||||
|
|
|
@ -44,7 +44,7 @@ public:
|
||||||
bool existsCharInProximityAt(const int index, const int c) const;
|
bool existsCharInProximityAt(const int index, const int c) const;
|
||||||
bool existsAdjacentProximityChars(const int index) const;
|
bool existsAdjacentProximityChars(const int index) const;
|
||||||
ProximityType getMatchedProximityId(
|
ProximityType getMatchedProximityId(
|
||||||
const int index, const unsigned short c, CorrectionState *correctionState) const;
|
const int index, const unsigned short c, const bool checkProximityChars) const;
|
||||||
bool sameAsTyped(const unsigned short *word, int length) const;
|
bool sameAsTyped(const unsigned short *word, int length) const;
|
||||||
private:
|
private:
|
||||||
int getStartIndexFromCoordinates(const int x, const int y) const;
|
int getStartIndexFromCoordinates(const int x, const int y) const;
|
||||||
|
|
Loading…
Reference in New Issue