Merge "Removed matchedChar count"
This commit is contained in:
commit
f7c449b649
3 changed files with 28 additions and 49 deletions
|
@ -102,7 +102,7 @@ int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLen
|
|||
const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2)
|
||||
: (mInputLength == inputIndex + 1);
|
||||
return Correction::RankingAlgorithm::calculateFinalFreq(
|
||||
inputIndex, outputIndex, mMatchedCharCount, freq, sameLength, this);
|
||||
inputIndex, outputIndex, freq, sameLength, this);
|
||||
}
|
||||
|
||||
bool Correction::initProcessState(const int outputIndex) {
|
||||
|
@ -111,10 +111,9 @@ bool Correction::initProcessState(const int outputIndex) {
|
|||
}
|
||||
mOutputIndex = outputIndex;
|
||||
--(mCorrectionStates[outputIndex].mChildCount);
|
||||
mMatchedCharCount = mCorrectionStates[outputIndex].mMatchedCount;
|
||||
mInputIndex = mCorrectionStates[outputIndex].mInputIndex;
|
||||
mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes;
|
||||
mDiffs = mCorrectionStates[outputIndex].mDiffs;
|
||||
mProximityCount = mCorrectionStates[outputIndex].mProximityCount;
|
||||
mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount;
|
||||
mSkipPos = mCorrectionStates[outputIndex].mSkipPos;
|
||||
mSkipping = false;
|
||||
|
@ -130,10 +129,6 @@ int Correction::goDownTree(
|
|||
return mOutputIndex;
|
||||
}
|
||||
|
||||
void Correction::charMatched() {
|
||||
++mMatchedCharCount;
|
||||
}
|
||||
|
||||
// TODO: remove
|
||||
int Correction::getOutputIndex() {
|
||||
return mOutputIndex;
|
||||
|
@ -158,10 +153,9 @@ void Correction::incrementOutputIndex() {
|
|||
mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
|
||||
mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
|
||||
mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
|
||||
mCorrectionStates[mOutputIndex].mMatchedCount = mMatchedCharCount;
|
||||
mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
|
||||
mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
|
||||
mCorrectionStates[mOutputIndex].mDiffs = mDiffs;
|
||||
mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
|
||||
mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
|
||||
mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
|
||||
mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
|
||||
|
@ -174,7 +168,7 @@ void Correction::startToTraverseAllNodes() {
|
|||
|
||||
bool Correction::needsToPrune() const {
|
||||
return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth)
|
||||
|| mDiffs > mMaxEditDistance);
|
||||
|| mProximityCount > mMaxEditDistance);
|
||||
}
|
||||
|
||||
Correction::CorrectionType Correction::processSkipChar(
|
||||
|
@ -231,8 +225,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
|||
int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
|
||||
inputIndexForProximity, c, checkProximityChars);
|
||||
|
||||
const bool unrelated = ProximityInfo::UNRELATED_CHAR == matchedProximityCharId;
|
||||
if (unrelated) {
|
||||
if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
|
||||
if (skip) {
|
||||
// Skip this letter and continue deeper
|
||||
++mSkippedCount;
|
||||
|
@ -240,19 +233,15 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
|||
} else {
|
||||
return UNRELATED;
|
||||
}
|
||||
} else if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
|
||||
// If inputIndex is greater than mInputLength, that means there is no
|
||||
// proximity chars. So, we don't need to check proximity.
|
||||
mMatching = true;
|
||||
} else if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
|
||||
incrementProximityCount();
|
||||
}
|
||||
|
||||
mWord[mOutputIndex] = c;
|
||||
// If inputIndex is greater than mInputLength, that means there is no
|
||||
// proximity chars. So, we don't need to check proximity.
|
||||
if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
|
||||
mMatching = true;
|
||||
charMatched();
|
||||
}
|
||||
|
||||
if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
|
||||
incrementDiffs();
|
||||
}
|
||||
|
||||
const bool isSameAsUserTypedLength = mInputLength
|
||||
== getInputIndex() + 1
|
||||
|
@ -336,24 +325,25 @@ inline static void multiplyRate(const int rate, int *freq) {
|
|||
//////////////////////
|
||||
|
||||
/* static */
|
||||
int Correction::RankingAlgorithm::calculateFinalFreq(
|
||||
const int inputIndex, const int outputIndex,
|
||||
const int matchCount, const int freq, const bool sameLength,
|
||||
const Correction* correction) {
|
||||
const int skipPos = correction->getSkipPos();
|
||||
int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
|
||||
const int freq, const bool sameLength, const Correction* correction) {
|
||||
const int excessivePos = correction->getExcessivePos();
|
||||
const int transposedPos = correction->getTransposedPos();
|
||||
const int inputLength = correction->mInputLength;
|
||||
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
|
||||
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
|
||||
const ProximityInfo *proximityInfo = correction->mProximityInfo;
|
||||
|
||||
// TODO: use mExcessiveCount
|
||||
const int matchCount = inputLength - correction->mProximityCount - (excessivePos >= 0 ? 1 : 0);
|
||||
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
|
||||
|
||||
const unsigned short* word = correction->mWord;
|
||||
const int skippedCount = correction->mSkippedCount;
|
||||
const bool skipped = correction->mSkippedCount > 0;
|
||||
|
||||
// TODO: Demote by edit distance
|
||||
int finalFreq = freq * matchWeight;
|
||||
if (skipPos >= 0) {
|
||||
if (skipped) {
|
||||
if (inputLength >= 2) {
|
||||
const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE
|
||||
* (10 * inputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
|
||||
|
@ -387,10 +377,10 @@ int Correction::RankingAlgorithm::calculateFinalFreq(
|
|||
}
|
||||
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
|
||||
}
|
||||
if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) {
|
||||
if (sameLength && transposedPos < 0 && !skipped && excessivePos < 0) {
|
||||
finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
|
||||
}
|
||||
} else if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0
|
||||
} else if (sameLength && transposedPos < 0 && !skipped && excessivePos < 0
|
||||
&& outputIndex > 0) {
|
||||
// A word with proximity corrections
|
||||
if (DEBUG_DICT) {
|
||||
|
@ -418,7 +408,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(
|
|||
s ... skipping
|
||||
a ... traversing all
|
||||
*/
|
||||
if (matchCount == inputLength && matchCount >= 2 && skippedCount == 0
|
||||
if (matchCount == inputLength && matchCount >= 2 && !skipped
|
||||
&& word[matchCount] == word[matchCount - 1]) {
|
||||
multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
|
||||
}
|
||||
|
|
|
@ -48,8 +48,6 @@ public:
|
|||
void checkState();
|
||||
bool initProcessState(const int index);
|
||||
|
||||
void getProcessState(int *matchedCount, int *inputIndex, int *outputIndex,
|
||||
bool *traverseAllNodes, int *diffs);
|
||||
int getOutputIndex();
|
||||
int getInputIndex();
|
||||
|
||||
|
@ -80,10 +78,6 @@ public:
|
|||
|
||||
CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
|
||||
|
||||
int getDiffs() const {
|
||||
return mDiffs;
|
||||
}
|
||||
|
||||
/////////////////////////
|
||||
// Tree helper methods
|
||||
int goDownTree(const int parentIndex, const int childCount, const int firstChildPos);
|
||||
|
@ -100,7 +94,6 @@ public:
|
|||
return mCorrectionStates[index].mParentIndex;
|
||||
}
|
||||
private:
|
||||
inline void charMatched();
|
||||
inline void incrementInputIndex();
|
||||
inline void incrementOutputIndex();
|
||||
inline bool needsToTraverseAllNodes();
|
||||
|
@ -109,8 +102,8 @@ private:
|
|||
inline CorrectionType processSkipChar(const int32_t c, const bool isTerminal);
|
||||
|
||||
// TODO: remove
|
||||
inline void incrementDiffs() {
|
||||
++mDiffs;
|
||||
inline void incrementProximityCount() {
|
||||
++mProximityCount;
|
||||
}
|
||||
|
||||
const int TYPED_LETTER_MULTIPLIER;
|
||||
|
@ -133,8 +126,7 @@ private:
|
|||
// The following member variables are being used as cache values of the correction state.
|
||||
int mOutputIndex;
|
||||
int mInputIndex;
|
||||
int mDiffs;
|
||||
int mMatchedCharCount;
|
||||
int mProximityCount;
|
||||
int mSkippedCount;
|
||||
int mSkipPos;
|
||||
bool mNeedsToTraverseAllNodes;
|
||||
|
@ -144,8 +136,7 @@ private:
|
|||
class RankingAlgorithm {
|
||||
public:
|
||||
static int calculateFinalFreq(const int inputIndex, const int depth,
|
||||
const int matchCount, const int freq, const bool sameLength,
|
||||
const Correction* correction);
|
||||
const int freq, const bool sameLength, const Correction* correction);
|
||||
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
|
||||
const Correction* correction);
|
||||
};
|
||||
|
|
|
@ -28,8 +28,7 @@ struct CorrectionState {
|
|||
int mSiblingPos;
|
||||
uint16_t mChildCount;
|
||||
uint8_t mInputIndex;
|
||||
uint8_t mDiffs;
|
||||
uint8_t mMatchedCount;
|
||||
uint8_t mProximityCount;
|
||||
uint8_t mSkippedCount;
|
||||
int8_t mSkipPos; // should be signed
|
||||
bool mMatching;
|
||||
|
@ -43,9 +42,8 @@ inline static void initCorrectionState(CorrectionState *state, const int rootPos
|
|||
state->mParentIndex = -1;
|
||||
state->mChildCount = childCount;
|
||||
state->mInputIndex = 0;
|
||||
state->mDiffs = 0;
|
||||
state->mProximityCount = 0;
|
||||
state->mSiblingPos = rootPos;
|
||||
state->mMatchedCount = 0;
|
||||
state->mSkippedCount = 0;
|
||||
state->mMatching = false;
|
||||
state->mSkipping = false;
|
||||
|
|
Loading…
Reference in a new issue