am d48c2117: am f4425aaa: Refactor ProximityType and CorrectionType
* commit 'd48c2117d0753ee21e9f6a14702dc8ec2fc4c408': Refactor ProximityType and CorrectionTypemain
commit
505c418dbf
|
@ -190,11 +190,11 @@ bool Correction::needsToPrune() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static bool isEquivalentChar(ProximityType type) {
|
inline static bool isEquivalentChar(ProximityType type) {
|
||||||
return type == EQUIVALENT_CHAR;
|
return type == MATCH_CHAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static bool isProximityCharOrEquivalentChar(ProximityType type) {
|
inline static bool isProximityCharOrEquivalentChar(ProximityType type) {
|
||||||
return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR;
|
return type == MATCH_CHAR || type == PROXIMITY_CHAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
Correction::CorrectionType Correction::processCharAndCalcState(const int c, const bool isTerminal) {
|
Correction::CorrectionType Correction::processCharAndCalcState(const int c, const bool isTerminal) {
|
||||||
|
@ -221,7 +221,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(const int c, cons
|
||||||
--mExcessiveCount;
|
--mExcessiveCount;
|
||||||
mDistances[mOutputIndex] =
|
mDistances[mOutputIndex] =
|
||||||
mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0);
|
mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0);
|
||||||
} else if (matchId == NEAR_PROXIMITY_CHAR) {
|
} else if (matchId == PROXIMITY_CHAR) {
|
||||||
mLastCharExceeded = false;
|
mLastCharExceeded = false;
|
||||||
--mExcessiveCount;
|
--mExcessiveCount;
|
||||||
++mProximityCount;
|
++mProximityCount;
|
||||||
|
@ -299,11 +299,11 @@ Correction::CorrectionType Correction::processCharAndCalcState(const int c, cons
|
||||||
: (noCorrectionsHappenedSoFar && mProximityCount == 0);
|
: (noCorrectionsHappenedSoFar && mProximityCount == 0);
|
||||||
|
|
||||||
ProximityType matchedProximityCharId = secondTransposing
|
ProximityType matchedProximityCharId = secondTransposing
|
||||||
? EQUIVALENT_CHAR
|
? MATCH_CHAR
|
||||||
: mProximityInfoState.getProximityType(
|
: mProximityInfoState.getProximityType(
|
||||||
mInputIndex, c, checkProximityChars, &proximityIndex);
|
mInputIndex, c, checkProximityChars, &proximityIndex);
|
||||||
|
|
||||||
if (UNRELATED_CHAR == matchedProximityCharId
|
if (SUBSTITUTION_CHAR == matchedProximityCharId
|
||||||
|| ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
|
|| ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
|
||||||
if (canTryCorrection && mOutputIndex > 0
|
if (canTryCorrection && mOutputIndex > 0
|
||||||
&& mCorrectionStates[mOutputIndex].mProximityMatching
|
&& mCorrectionStates[mOutputIndex].mProximityMatching
|
||||||
|
@ -332,7 +332,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(const int c, cons
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (UNRELATED_CHAR == matchedProximityCharId
|
if (SUBSTITUTION_CHAR == matchedProximityCharId
|
||||||
|| ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
|
|| ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
|
||||||
if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
|
if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
|
||||||
mAdditionalProximityMatching = true;
|
mAdditionalProximityMatching = true;
|
||||||
|
@ -455,7 +455,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(const int c, cons
|
||||||
mMatching = true;
|
mMatching = true;
|
||||||
++mEquivalentCharCount;
|
++mEquivalentCharCount;
|
||||||
mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0);
|
mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0);
|
||||||
} else if (NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
|
} else if (PROXIMITY_CHAR == matchedProximityCharId) {
|
||||||
mProximityMatching = true;
|
mProximityMatching = true;
|
||||||
++mProximityCount;
|
++mProximityCount;
|
||||||
mDistances[mOutputIndex] =
|
mDistances[mOutputIndex] =
|
||||||
|
@ -614,7 +614,7 @@ inline static bool isUpperCase(unsigned short c) {
|
||||||
multiplyIntCapped(matchWeight, &finalFreq);
|
multiplyIntCapped(matchWeight, &finalFreq);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (proximityInfoState->getProximityType(0, word[0], true) == UNRELATED_CHAR) {
|
if (proximityInfoState->getProximityType(0, word[0], true) == SUBSTITUTION_CHAR) {
|
||||||
multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq);
|
multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -275,7 +275,7 @@ static inline void prof_out(void) {
|
||||||
#define NOT_A_CODE_POINT (-1)
|
#define NOT_A_CODE_POINT (-1)
|
||||||
#define NOT_A_DISTANCE (-1)
|
#define NOT_A_DISTANCE (-1)
|
||||||
#define NOT_A_COORDINATE (-1)
|
#define NOT_A_COORDINATE (-1)
|
||||||
#define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO (-2)
|
#define MATCH_CHAR_WITHOUT_DISTANCE_INFO (-2)
|
||||||
#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO (-3)
|
#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO (-3)
|
||||||
#define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO (-4)
|
#define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO (-4)
|
||||||
#define NOT_AN_INDEX (-1)
|
#define NOT_AN_INDEX (-1)
|
||||||
|
@ -396,15 +396,15 @@ template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { retu
|
||||||
// Used as a return value for character comparison
|
// Used as a return value for character comparison
|
||||||
typedef enum {
|
typedef enum {
|
||||||
// Same char, possibly with different case or accent
|
// Same char, possibly with different case or accent
|
||||||
EQUIVALENT_CHAR,
|
MATCH_CHAR,
|
||||||
// It is a char located nearby on the keyboard
|
// It is a char located nearby on the keyboard
|
||||||
NEAR_PROXIMITY_CHAR,
|
PROXIMITY_CHAR,
|
||||||
// It is an unrelated char and could be a substitution char
|
|
||||||
UNRELATED_CHAR,
|
|
||||||
// It is an unrelated char nor a substitution char
|
|
||||||
UNRELATED_NOR_SUBSTITUTION_CHAR,
|
|
||||||
// Additional proximity char which can differ by language.
|
// Additional proximity char which can differ by language.
|
||||||
ADDITIONAL_PROXIMITY_CHAR
|
ADDITIONAL_PROXIMITY_CHAR,
|
||||||
|
// It is a substitution char
|
||||||
|
SUBSTITUTION_CHAR,
|
||||||
|
// It is an unrelated char
|
||||||
|
UNRELATED_CHAR,
|
||||||
} ProximityType;
|
} ProximityType;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
@ -415,15 +415,16 @@ typedef enum {
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
CT_MATCH,
|
CT_MATCH,
|
||||||
|
CT_PROXIMITY,
|
||||||
|
CT_ADDITIONAL_PROXIMITY,
|
||||||
|
CT_SUBSTITUTION,
|
||||||
CT_OMISSION,
|
CT_OMISSION,
|
||||||
CT_INSERTION,
|
CT_INSERTION,
|
||||||
CT_TRANSPOSITION,
|
CT_TRANSPOSITION,
|
||||||
CT_SUBSTITUTION,
|
|
||||||
CT_SPACE_SUBSTITUTION,
|
CT_SPACE_SUBSTITUTION,
|
||||||
CT_SPACE_OMISSION,
|
CT_SPACE_OMISSION,
|
||||||
CT_COMPLETION,
|
CT_COMPLETION,
|
||||||
CT_TERMINAL,
|
CT_TERMINAL,
|
||||||
CT_NEW_WORD,
|
CT_NEW_WORD,
|
||||||
CT_NEW_WORD_BIGRAM,
|
|
||||||
} CorrectionType;
|
} CorrectionType;
|
||||||
#endif // LATINIME_DEFINES_H
|
#endif // LATINIME_DEFINES_H
|
||||||
|
|
|
@ -197,15 +197,15 @@ ProximityType ProximityInfoState::getProximityType(const int index, const int co
|
||||||
// The first char in the array is what user typed. If it matches right away, that means the
|
// The first char in the array is what user typed. If it matches right away, that means the
|
||||||
// user typed that same char for this pos.
|
// user typed that same char for this pos.
|
||||||
if (firstCodePoint == baseLowerC || firstCodePoint == codePoint) {
|
if (firstCodePoint == baseLowerC || firstCodePoint == codePoint) {
|
||||||
return EQUIVALENT_CHAR;
|
return MATCH_CHAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!checkProximityChars) return UNRELATED_CHAR;
|
if (!checkProximityChars) return SUBSTITUTION_CHAR;
|
||||||
|
|
||||||
// If the non-accented, lowercased version of that first character matches c, then we have a
|
// If the non-accented, lowercased version of that first character matches c, then we have a
|
||||||
// non-accented version of the accented character the user typed. Treat it as a close char.
|
// non-accented version of the accented character the user typed. Treat it as a close char.
|
||||||
if (toBaseLowerCase(firstCodePoint) == baseLowerC) {
|
if (toBaseLowerCase(firstCodePoint) == baseLowerC) {
|
||||||
return NEAR_PROXIMITY_CHAR;
|
return PROXIMITY_CHAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Not an exact nor an accent-alike match: search the list of close keys
|
// Not an exact nor an accent-alike match: search the list of close keys
|
||||||
|
@ -218,7 +218,7 @@ ProximityType ProximityInfoState::getProximityType(const int index, const int co
|
||||||
if (proximityIndex) {
|
if (proximityIndex) {
|
||||||
*proximityIndex = j;
|
*proximityIndex = j;
|
||||||
}
|
}
|
||||||
return NEAR_PROXIMITY_CHAR;
|
return PROXIMITY_CHAR;
|
||||||
}
|
}
|
||||||
++j;
|
++j;
|
||||||
}
|
}
|
||||||
|
@ -238,23 +238,23 @@ ProximityType ProximityInfoState::getProximityType(const int index, const int co
|
||||||
++j;
|
++j;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Was not included, signal this as an unrelated character.
|
// Was not included, signal this as a substitution character.
|
||||||
return UNRELATED_CHAR;
|
return SUBSTITUTION_CHAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
ProximityType ProximityInfoState::getProximityTypeG(const int index, const int codePoint) const {
|
ProximityType ProximityInfoState::getProximityTypeG(const int index, const int codePoint) const {
|
||||||
if (!isUsed()) {
|
if (!isUsed()) {
|
||||||
return UNRELATED_NOR_SUBSTITUTION_CHAR;
|
return UNRELATED_CHAR;
|
||||||
}
|
}
|
||||||
const int lowerCodePoint = toLowerCase(codePoint);
|
const int lowerCodePoint = toLowerCase(codePoint);
|
||||||
const int baseLowerCodePoint = toBaseCodePoint(lowerCodePoint);
|
const int baseLowerCodePoint = toBaseCodePoint(lowerCodePoint);
|
||||||
for (int i = 0; i < static_cast<int>(mSampledSearchKeyVectors[index].size()); ++i) {
|
for (int i = 0; i < static_cast<int>(mSampledSearchKeyVectors[index].size()); ++i) {
|
||||||
if (mSampledSearchKeyVectors[index][i] == lowerCodePoint
|
if (mSampledSearchKeyVectors[index][i] == lowerCodePoint
|
||||||
|| mSampledSearchKeyVectors[index][i] == baseLowerCodePoint) {
|
|| mSampledSearchKeyVectors[index][i] == baseLowerCodePoint) {
|
||||||
return EQUIVALENT_CHAR;
|
return MATCH_CHAR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return UNRELATED_NOR_SUBSTITUTION_CHAR;
|
return UNRELATED_CHAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ProximityInfoState::isKeyInSerchKeysAfterIndex(const int index, const int keyId) const {
|
bool ProximityInfoState::isKeyInSerchKeysAfterIndex(const int index, const int keyId) const {
|
||||||
|
|
|
@ -209,7 +209,7 @@ namespace latinime {
|
||||||
* ProximityInfoParams::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR);
|
* ProximityInfoParams::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR);
|
||||||
} else {
|
} else {
|
||||||
normalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] =
|
normalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] =
|
||||||
(j == 0) ? EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO :
|
(j == 0) ? MATCH_CHAR_WITHOUT_DISTANCE_INFO :
|
||||||
PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO;
|
PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO;
|
||||||
}
|
}
|
||||||
if (DEBUG_PROXIMITY_CHARS) {
|
if (DEBUG_PROXIMITY_CHARS) {
|
||||||
|
|
Loading…
Reference in New Issue