Merge "Removed matchedChar count"

2011-08-11 07:04:40 -07:00 · 2011-08-11 07:04:40 -07:00 · f7c449b649
commit f7c449b649
parent 4ef448ac82 466ed22fc6
3 changed files with 28 additions and 49 deletions
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@ -102,7 +102,7 @@ int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLen
    const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2)
            : (mInputLength == inputIndex + 1);
    return Correction::RankingAlgorithm::calculateFinalFreq(
-            inputIndex, outputIndex, mMatchedCharCount, freq, sameLength, this);
+            inputIndex, outputIndex, freq, sameLength, this);
 }

 bool Correction::initProcessState(const int outputIndex) {
@ -111,10 +111,9 @@ bool Correction::initProcessState(const int outputIndex) {
    }
    mOutputIndex = outputIndex;
    --(mCorrectionStates[outputIndex].mChildCount);
-    mMatchedCharCount = mCorrectionStates[outputIndex].mMatchedCount;
    mInputIndex = mCorrectionStates[outputIndex].mInputIndex;
    mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes;
-    mDiffs = mCorrectionStates[outputIndex].mDiffs;
+    mProximityCount = mCorrectionStates[outputIndex].mProximityCount;
    mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount;
    mSkipPos = mCorrectionStates[outputIndex].mSkipPos;
    mSkipping = false;
@ -130,10 +129,6 @@ int Correction::goDownTree(
    return mOutputIndex;
 }

-void Correction::charMatched() {
-    ++mMatchedCharCount;
-}
-
 // TODO: remove
 int Correction::getOutputIndex() {
    return mOutputIndex;
@ -158,10 +153,9 @@ void Correction::incrementOutputIndex() {
    mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
    mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
    mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
-    mCorrectionStates[mOutputIndex].mMatchedCount = mMatchedCharCount;
    mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
    mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
-    mCorrectionStates[mOutputIndex].mDiffs = mDiffs;
+    mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
    mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
    mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
    mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
@ -174,7 +168,7 @@ void Correction::startToTraverseAllNodes() {

 bool Correction::needsToPrune() const {
    return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth)
-            || mDiffs > mMaxEditDistance);
+            || mProximityCount > mMaxEditDistance);
 }

 Correction::CorrectionType Correction::processSkipChar(
@ -231,8 +225,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
        int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
                inputIndexForProximity, c, checkProximityChars);

-        const bool unrelated = ProximityInfo::UNRELATED_CHAR == matchedProximityCharId;
-        if (unrelated) {
+        if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
            if (skip) {
                // Skip this letter and continue deeper
                ++mSkippedCount;
@ -240,19 +233,15 @@ Correction::CorrectionType Correction::processCharAndCalcState(
            } else {
                return UNRELATED;
            }
+        } else if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
+            // If inputIndex is greater than mInputLength, that means there is no
+            // proximity chars. So, we don't need to check proximity.
+            mMatching = true;
+        } else if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
+            incrementProximityCount();
        }

        mWord[mOutputIndex] = c;
-        // If inputIndex is greater than mInputLength, that means there is no
-        // proximity chars. So, we don't need to check proximity.
-        if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
-            mMatching = true;
-            charMatched();
-        }
-
-        if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
-            incrementDiffs();
-        }

        const bool isSameAsUserTypedLength = mInputLength
                == getInputIndex() + 1
@ -336,24 +325,25 @@ inline static void multiplyRate(const int rate, int *freq) {
 //////////////////////

 /* static */
-int Correction::RankingAlgorithm::calculateFinalFreq(
-        const int inputIndex, const int outputIndex,
-        const int matchCount, const int freq, const bool sameLength,
-        const Correction* correction) {
-    const int skipPos = correction->getSkipPos();
+int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
+        const int freq, const bool sameLength, const Correction* correction) {
    const int excessivePos = correction->getExcessivePos();
    const int transposedPos = correction->getTransposedPos();
    const int inputLength = correction->mInputLength;
    const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
    const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
    const ProximityInfo *proximityInfo = correction->mProximityInfo;
+
+    // TODO: use mExcessiveCount
+    const int matchCount = inputLength - correction->mProximityCount - (excessivePos >= 0 ? 1 : 0);
    const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
+
    const unsigned short* word = correction->mWord;
-    const int skippedCount = correction->mSkippedCount;
+    const bool skipped = correction->mSkippedCount > 0;

    // TODO: Demote by edit distance
    int finalFreq = freq * matchWeight;
-    if (skipPos >= 0) {
+    if (skipped) {
        if (inputLength >= 2) {
            const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE
                    * (10 * inputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
@ -387,10 +377,10 @@ int Correction::RankingAlgorithm::calculateFinalFreq(
            }
            multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
        }
-        if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) {
+        if (sameLength && transposedPos < 0 && !skipped && excessivePos < 0) {
            finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
        }
-    } else if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0
+    } else if (sameLength && transposedPos < 0 && !skipped && excessivePos < 0
            && outputIndex > 0) {
        // A word with proximity corrections
        if (DEBUG_DICT) {
@ -418,7 +408,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(
     s ... skipping
     a ... traversing all
     */
-    if (matchCount == inputLength && matchCount >= 2 && skippedCount == 0
+    if (matchCount == inputLength && matchCount >= 2 && !skipped
            && word[matchCount] == word[matchCount - 1]) {
        multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
    }
--- a/native/src/correction.h
+++ b/native/src/correction.h
@ -48,8 +48,6 @@ public:
    void checkState();
    bool initProcessState(const int index);

-    void getProcessState(int *matchedCount, int *inputIndex, int *outputIndex,
-            bool *traverseAllNodes, int *diffs);
    int getOutputIndex();
    int getInputIndex();

@ -80,10 +78,6 @@ public:

    CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);

-    int getDiffs() const {
-        return mDiffs;
-    }
-
    /////////////////////////
    // Tree helper methods
    int goDownTree(const int parentIndex, const int childCount, const int firstChildPos);
@ -100,7 +94,6 @@ public:
        return mCorrectionStates[index].mParentIndex;
    }
 private:
-    inline void charMatched();
    inline void incrementInputIndex();
    inline void incrementOutputIndex();
    inline bool needsToTraverseAllNodes();
@ -109,8 +102,8 @@ private:
    inline CorrectionType processSkipChar(const int32_t c, const bool isTerminal);

    // TODO: remove
-    inline void incrementDiffs() {
-        ++mDiffs;
+    inline void incrementProximityCount() {
+        ++mProximityCount;
    }

    const int TYPED_LETTER_MULTIPLIER;
@ -133,8 +126,7 @@ private:
    // The following member variables are being used as cache values of the correction state.
    int mOutputIndex;
    int mInputIndex;
-    int mDiffs;
-    int mMatchedCharCount;
+    int mProximityCount;
    int mSkippedCount;
    int mSkipPos;
    bool mNeedsToTraverseAllNodes;
@ -144,8 +136,7 @@ private:
    class RankingAlgorithm {
    public:
        static int calculateFinalFreq(const int inputIndex, const int depth,
-                const int matchCount, const int freq, const bool sameLength,
-                const Correction* correction);
+                const int freq, const bool sameLength, const Correction* correction);
        static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
                const Correction* correction);
    };
--- a/native/src/correction_state.h
+++ b/native/src/correction_state.h
@ -28,8 +28,7 @@ struct CorrectionState {
    int mSiblingPos;
    uint16_t mChildCount;
    uint8_t mInputIndex;
-    uint8_t mDiffs;
-    uint8_t mMatchedCount;
+    uint8_t mProximityCount;
    uint8_t mSkippedCount;
    int8_t mSkipPos; // should be signed
    bool mMatching;
@ -43,9 +42,8 @@ inline static void initCorrectionState(CorrectionState *state, const int rootPos
    state->mParentIndex = -1;
    state->mChildCount = childCount;
    state->mInputIndex = 0;
-    state->mDiffs = 0;
+    state->mProximityCount = 0;
    state->mSiblingPos = rootPos;
-    state->mMatchedCount = 0;
    state->mSkippedCount = 0;
    state->mMatching = false;
    state->mSkipping = false;