Merge "Removed matchedChar count"
commit
f7c449b649
|
@ -102,7 +102,7 @@ int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLen
|
||||||
const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2)
|
const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2)
|
||||||
: (mInputLength == inputIndex + 1);
|
: (mInputLength == inputIndex + 1);
|
||||||
return Correction::RankingAlgorithm::calculateFinalFreq(
|
return Correction::RankingAlgorithm::calculateFinalFreq(
|
||||||
inputIndex, outputIndex, mMatchedCharCount, freq, sameLength, this);
|
inputIndex, outputIndex, freq, sameLength, this);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Correction::initProcessState(const int outputIndex) {
|
bool Correction::initProcessState(const int outputIndex) {
|
||||||
|
@ -111,10 +111,9 @@ bool Correction::initProcessState(const int outputIndex) {
|
||||||
}
|
}
|
||||||
mOutputIndex = outputIndex;
|
mOutputIndex = outputIndex;
|
||||||
--(mCorrectionStates[outputIndex].mChildCount);
|
--(mCorrectionStates[outputIndex].mChildCount);
|
||||||
mMatchedCharCount = mCorrectionStates[outputIndex].mMatchedCount;
|
|
||||||
mInputIndex = mCorrectionStates[outputIndex].mInputIndex;
|
mInputIndex = mCorrectionStates[outputIndex].mInputIndex;
|
||||||
mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes;
|
mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes;
|
||||||
mDiffs = mCorrectionStates[outputIndex].mDiffs;
|
mProximityCount = mCorrectionStates[outputIndex].mProximityCount;
|
||||||
mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount;
|
mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount;
|
||||||
mSkipPos = mCorrectionStates[outputIndex].mSkipPos;
|
mSkipPos = mCorrectionStates[outputIndex].mSkipPos;
|
||||||
mSkipping = false;
|
mSkipping = false;
|
||||||
|
@ -130,10 +129,6 @@ int Correction::goDownTree(
|
||||||
return mOutputIndex;
|
return mOutputIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Correction::charMatched() {
|
|
||||||
++mMatchedCharCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: remove
|
// TODO: remove
|
||||||
int Correction::getOutputIndex() {
|
int Correction::getOutputIndex() {
|
||||||
return mOutputIndex;
|
return mOutputIndex;
|
||||||
|
@ -158,10 +153,9 @@ void Correction::incrementOutputIndex() {
|
||||||
mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
|
mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
|
||||||
mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
|
mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
|
||||||
mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
|
mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
|
||||||
mCorrectionStates[mOutputIndex].mMatchedCount = mMatchedCharCount;
|
|
||||||
mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
|
mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
|
||||||
mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
|
mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
|
||||||
mCorrectionStates[mOutputIndex].mDiffs = mDiffs;
|
mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
|
||||||
mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
|
mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
|
||||||
mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
|
mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
|
||||||
mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
|
mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
|
||||||
|
@ -174,7 +168,7 @@ void Correction::startToTraverseAllNodes() {
|
||||||
|
|
||||||
bool Correction::needsToPrune() const {
|
bool Correction::needsToPrune() const {
|
||||||
return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth)
|
return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth)
|
||||||
|| mDiffs > mMaxEditDistance);
|
|| mProximityCount > mMaxEditDistance);
|
||||||
}
|
}
|
||||||
|
|
||||||
Correction::CorrectionType Correction::processSkipChar(
|
Correction::CorrectionType Correction::processSkipChar(
|
||||||
|
@ -231,8 +225,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
||||||
int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
|
int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
|
||||||
inputIndexForProximity, c, checkProximityChars);
|
inputIndexForProximity, c, checkProximityChars);
|
||||||
|
|
||||||
const bool unrelated = ProximityInfo::UNRELATED_CHAR == matchedProximityCharId;
|
if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
|
||||||
if (unrelated) {
|
|
||||||
if (skip) {
|
if (skip) {
|
||||||
// Skip this letter and continue deeper
|
// Skip this letter and continue deeper
|
||||||
++mSkippedCount;
|
++mSkippedCount;
|
||||||
|
@ -240,19 +233,15 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
||||||
} else {
|
} else {
|
||||||
return UNRELATED;
|
return UNRELATED;
|
||||||
}
|
}
|
||||||
|
} else if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
|
||||||
|
// If inputIndex is greater than mInputLength, that means there is no
|
||||||
|
// proximity chars. So, we don't need to check proximity.
|
||||||
|
mMatching = true;
|
||||||
|
} else if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
|
||||||
|
incrementProximityCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
mWord[mOutputIndex] = c;
|
mWord[mOutputIndex] = c;
|
||||||
// If inputIndex is greater than mInputLength, that means there is no
|
|
||||||
// proximity chars. So, we don't need to check proximity.
|
|
||||||
if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
|
|
||||||
mMatching = true;
|
|
||||||
charMatched();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
|
|
||||||
incrementDiffs();
|
|
||||||
}
|
|
||||||
|
|
||||||
const bool isSameAsUserTypedLength = mInputLength
|
const bool isSameAsUserTypedLength = mInputLength
|
||||||
== getInputIndex() + 1
|
== getInputIndex() + 1
|
||||||
|
@ -336,24 +325,25 @@ inline static void multiplyRate(const int rate, int *freq) {
|
||||||
//////////////////////
|
//////////////////////
|
||||||
|
|
||||||
/* static */
|
/* static */
|
||||||
int Correction::RankingAlgorithm::calculateFinalFreq(
|
int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
|
||||||
const int inputIndex, const int outputIndex,
|
const int freq, const bool sameLength, const Correction* correction) {
|
||||||
const int matchCount, const int freq, const bool sameLength,
|
|
||||||
const Correction* correction) {
|
|
||||||
const int skipPos = correction->getSkipPos();
|
|
||||||
const int excessivePos = correction->getExcessivePos();
|
const int excessivePos = correction->getExcessivePos();
|
||||||
const int transposedPos = correction->getTransposedPos();
|
const int transposedPos = correction->getTransposedPos();
|
||||||
const int inputLength = correction->mInputLength;
|
const int inputLength = correction->mInputLength;
|
||||||
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
|
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
|
||||||
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
|
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
|
||||||
const ProximityInfo *proximityInfo = correction->mProximityInfo;
|
const ProximityInfo *proximityInfo = correction->mProximityInfo;
|
||||||
|
|
||||||
|
// TODO: use mExcessiveCount
|
||||||
|
const int matchCount = inputLength - correction->mProximityCount - (excessivePos >= 0 ? 1 : 0);
|
||||||
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
|
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
|
||||||
|
|
||||||
const unsigned short* word = correction->mWord;
|
const unsigned short* word = correction->mWord;
|
||||||
const int skippedCount = correction->mSkippedCount;
|
const bool skipped = correction->mSkippedCount > 0;
|
||||||
|
|
||||||
// TODO: Demote by edit distance
|
// TODO: Demote by edit distance
|
||||||
int finalFreq = freq * matchWeight;
|
int finalFreq = freq * matchWeight;
|
||||||
if (skipPos >= 0) {
|
if (skipped) {
|
||||||
if (inputLength >= 2) {
|
if (inputLength >= 2) {
|
||||||
const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE
|
const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE
|
||||||
* (10 * inputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
|
* (10 * inputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
|
||||||
|
@ -387,10 +377,10 @@ int Correction::RankingAlgorithm::calculateFinalFreq(
|
||||||
}
|
}
|
||||||
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
|
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
|
||||||
}
|
}
|
||||||
if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) {
|
if (sameLength && transposedPos < 0 && !skipped && excessivePos < 0) {
|
||||||
finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
|
finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
|
||||||
}
|
}
|
||||||
} else if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0
|
} else if (sameLength && transposedPos < 0 && !skipped && excessivePos < 0
|
||||||
&& outputIndex > 0) {
|
&& outputIndex > 0) {
|
||||||
// A word with proximity corrections
|
// A word with proximity corrections
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
|
@ -418,7 +408,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(
|
||||||
s ... skipping
|
s ... skipping
|
||||||
a ... traversing all
|
a ... traversing all
|
||||||
*/
|
*/
|
||||||
if (matchCount == inputLength && matchCount >= 2 && skippedCount == 0
|
if (matchCount == inputLength && matchCount >= 2 && !skipped
|
||||||
&& word[matchCount] == word[matchCount - 1]) {
|
&& word[matchCount] == word[matchCount - 1]) {
|
||||||
multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
|
multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,8 +48,6 @@ public:
|
||||||
void checkState();
|
void checkState();
|
||||||
bool initProcessState(const int index);
|
bool initProcessState(const int index);
|
||||||
|
|
||||||
void getProcessState(int *matchedCount, int *inputIndex, int *outputIndex,
|
|
||||||
bool *traverseAllNodes, int *diffs);
|
|
||||||
int getOutputIndex();
|
int getOutputIndex();
|
||||||
int getInputIndex();
|
int getInputIndex();
|
||||||
|
|
||||||
|
@ -80,10 +78,6 @@ public:
|
||||||
|
|
||||||
CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
|
CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
|
||||||
|
|
||||||
int getDiffs() const {
|
|
||||||
return mDiffs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
// Tree helper methods
|
// Tree helper methods
|
||||||
int goDownTree(const int parentIndex, const int childCount, const int firstChildPos);
|
int goDownTree(const int parentIndex, const int childCount, const int firstChildPos);
|
||||||
|
@ -100,7 +94,6 @@ public:
|
||||||
return mCorrectionStates[index].mParentIndex;
|
return mCorrectionStates[index].mParentIndex;
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
inline void charMatched();
|
|
||||||
inline void incrementInputIndex();
|
inline void incrementInputIndex();
|
||||||
inline void incrementOutputIndex();
|
inline void incrementOutputIndex();
|
||||||
inline bool needsToTraverseAllNodes();
|
inline bool needsToTraverseAllNodes();
|
||||||
|
@ -109,8 +102,8 @@ private:
|
||||||
inline CorrectionType processSkipChar(const int32_t c, const bool isTerminal);
|
inline CorrectionType processSkipChar(const int32_t c, const bool isTerminal);
|
||||||
|
|
||||||
// TODO: remove
|
// TODO: remove
|
||||||
inline void incrementDiffs() {
|
inline void incrementProximityCount() {
|
||||||
++mDiffs;
|
++mProximityCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int TYPED_LETTER_MULTIPLIER;
|
const int TYPED_LETTER_MULTIPLIER;
|
||||||
|
@ -133,8 +126,7 @@ private:
|
||||||
// The following member variables are being used as cache values of the correction state.
|
// The following member variables are being used as cache values of the correction state.
|
||||||
int mOutputIndex;
|
int mOutputIndex;
|
||||||
int mInputIndex;
|
int mInputIndex;
|
||||||
int mDiffs;
|
int mProximityCount;
|
||||||
int mMatchedCharCount;
|
|
||||||
int mSkippedCount;
|
int mSkippedCount;
|
||||||
int mSkipPos;
|
int mSkipPos;
|
||||||
bool mNeedsToTraverseAllNodes;
|
bool mNeedsToTraverseAllNodes;
|
||||||
|
@ -144,8 +136,7 @@ private:
|
||||||
class RankingAlgorithm {
|
class RankingAlgorithm {
|
||||||
public:
|
public:
|
||||||
static int calculateFinalFreq(const int inputIndex, const int depth,
|
static int calculateFinalFreq(const int inputIndex, const int depth,
|
||||||
const int matchCount, const int freq, const bool sameLength,
|
const int freq, const bool sameLength, const Correction* correction);
|
||||||
const Correction* correction);
|
|
||||||
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
|
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
|
||||||
const Correction* correction);
|
const Correction* correction);
|
||||||
};
|
};
|
||||||
|
|
|
@ -28,8 +28,7 @@ struct CorrectionState {
|
||||||
int mSiblingPos;
|
int mSiblingPos;
|
||||||
uint16_t mChildCount;
|
uint16_t mChildCount;
|
||||||
uint8_t mInputIndex;
|
uint8_t mInputIndex;
|
||||||
uint8_t mDiffs;
|
uint8_t mProximityCount;
|
||||||
uint8_t mMatchedCount;
|
|
||||||
uint8_t mSkippedCount;
|
uint8_t mSkippedCount;
|
||||||
int8_t mSkipPos; // should be signed
|
int8_t mSkipPos; // should be signed
|
||||||
bool mMatching;
|
bool mMatching;
|
||||||
|
@ -43,9 +42,8 @@ inline static void initCorrectionState(CorrectionState *state, const int rootPos
|
||||||
state->mParentIndex = -1;
|
state->mParentIndex = -1;
|
||||||
state->mChildCount = childCount;
|
state->mChildCount = childCount;
|
||||||
state->mInputIndex = 0;
|
state->mInputIndex = 0;
|
||||||
state->mDiffs = 0;
|
state->mProximityCount = 0;
|
||||||
state->mSiblingPos = rootPos;
|
state->mSiblingPos = rootPos;
|
||||||
state->mMatchedCount = 0;
|
|
||||||
state->mSkippedCount = 0;
|
state->mSkippedCount = 0;
|
||||||
state->mMatching = false;
|
state->mMatching = false;
|
||||||
state->mSkipping = false;
|
state->mSkipping = false;
|
||||||
|
|
Loading…
Reference in New Issue