From 90cb956c4fd6b9aa2f5c6689448485e87061da50 Mon Sep 17 00:00:00 2001 From: Tom Ouyang Date: Tue, 16 Apr 2013 16:51:55 -0700 Subject: [PATCH] Remove duplicate new-word bigram cost (part 1) Removes a duplicate application of the new word bigram cost and updates only the related parameters (those associated with multi-word suggestions). Note: test results will improve after full optimization. [Category diff] +1 357 -1 485 +2 16 -2 20 +3 20 -3 16 +4 198 -4 226 +5 510 -5 443 +6 518 -6 368 +7 394 -7 455 [Weighted category diff] +1 482 -1 532 +2 22 -2 22 +3 22 -3 22 +4 233 -4 381 +5 578 -5 500 +6 617 -6 498 +7 522 -7 521 Bug: 8633962 Change-Id: I3c3ecc9460e8e03e44925e11b2d4b037a6c3b99e --- native/jni/src/suggest/core/dicnode/dic_node.h | 5 ----- .../core/dicnode/dic_node_state_scoring.h | 17 +---------------- .../policyimpl/typing/scoring_params.cpp | 6 +++--- .../policyimpl/typing/typing_weighting.h | 9 ++------- 4 files changed, 6 insertions(+), 31 deletions(-) diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 32faae52c..f8d2df452 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -360,11 +360,6 @@ class DicNode { return mDicNodeState.mDicNodeStateScoring.getCompoundDistance(languageWeight); } - // Note that "cost" means delta for "distance" that is weighted. - float getTotalPrevWordsLanguageCost() const { - return mDicNodeState.mDicNodeStateScoring.getTotalPrevWordsLanguageCost(); - } - // Used to commit input partially int getPrevWordNodePos() const { return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos(); diff --git a/native/jni/src/suggest/core/dicnode/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/dic_node_state_scoring.h index 8902d3122..fd9d610e3 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_state_scoring.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_state_scoring.h @@ -31,7 +31,7 @@ class DicNodeStateScoring { mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX), mEditCorrectionCount(0), mProximityCorrectionCount(0), mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f), - mTotalPrevWordsLanguageCost(0.0f), mRawLength(0.0f) { + mRawLength(0.0f) { } virtual ~DicNodeStateScoring() {} @@ -42,7 +42,6 @@ class DicNodeStateScoring { mNormalizedCompoundDistance = 0.0f; mSpatialDistance = 0.0f; mLanguageDistance = 0.0f; - mTotalPrevWordsLanguageCost = 0.0f; mRawLength = 0.0f; mDoubleLetterLevel = NOT_A_DOUBLE_LETTER; mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX; @@ -54,7 +53,6 @@ class DicNodeStateScoring { mNormalizedCompoundDistance = scoring->mNormalizedCompoundDistance; mSpatialDistance = scoring->mSpatialDistance; mLanguageDistance = scoring->mLanguageDistance; - mTotalPrevWordsLanguageCost = scoring->mTotalPrevWordsLanguageCost; mRawLength = scoring->mRawLength; mDoubleLetterLevel = scoring->mDoubleLetterLevel; mDigraphIndex = scoring->mDigraphIndex; @@ -70,9 +68,6 @@ class DicNodeStateScoring { if (isProximityCorrection) { ++mProximityCorrectionCount; } - if (languageCost > 0.0f) { - setTotalPrevWordsLanguageCost(mTotalPrevWordsLanguageCost + languageCost); - } } void addRawLength(const float rawLength) { @@ -148,10 +143,6 @@ class DicNodeStateScoring { } } - float getTotalPrevWordsLanguageCost() const { - return mTotalPrevWordsLanguageCost; - } - private: // Caution!!! // Use a default copy constructor and an assign operator because shallow copies are ok @@ -165,7 +156,6 @@ class DicNodeStateScoring { float mNormalizedCompoundDistance; float mSpatialDistance; float mLanguageDistance; - float mTotalPrevWordsLanguageCost; float mRawLength; AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance, @@ -179,11 +169,6 @@ class DicNodeStateScoring { / static_cast(max(1, totalInputIndex)); } } - - //TODO: remove - AK_FORCE_INLINE void setTotalPrevWordsLanguageCost(float totalPrevWordsLanguageCost) { - mTotalPrevWordsLanguageCost = totalPrevWordsLanguageCost; - } }; } // namespace latinime #endif // LATINIME_DIC_NODE_STATE_SCORING_H diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp index 0fa684f01..11ccf1773 100644 --- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp @@ -35,17 +35,17 @@ const float ScoringParams::INSERTION_COST = 0.670f; const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.526f; const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.563f; const float ScoringParams::TRANSPOSITION_COST = 0.494f; -const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.239f; +const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.289f; const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.380f; const float ScoringParams::SUBSTITUTION_COST = 0.363f; -const float ScoringParams::COST_NEW_WORD = 0.054f; +const float ScoringParams::COST_NEW_WORD = 0.024f; const float ScoringParams::COST_NEW_WORD_CAPITALIZED = 0.174f; const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.123f; const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.462f; const float ScoringParams::COST_LOOKAHEAD = 0.092f; const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.126f; const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.056f; -const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.136f; +const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.536f; const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f; const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f; const float ScoringParams::MAX_NORM_DISTANCE_FOR_EDIT = 0.1f; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index 74e4e34e4..34d25ae1a 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -140,7 +140,7 @@ class TypingWeighting : public Weighting { const DicTraverseSession *const traverseSession, const DicNode *const dicNode, hash_map_compat *const bigramCacheMap) const { return DicNodeUtils::getBigramNodeImprobability(traverseSession->getOffsetDict(), - dicNode, bigramCacheMap); + dicNode, bigramCacheMap) * ScoringParams::DISTANCE_WEIGHT_LANGUAGE; } float getCompletionCost(const DicTraverseSession *const traverseSession, @@ -164,13 +164,8 @@ class TypingWeighting : public Weighting { // because the input word shouldn't be treated as perfect const bool isExactMatch = !hasEditCount && !hasMultipleWords && !hasProximityErrors && isSameLength; - - const float totalPrevWordsLanguageCost = dicNode->getTotalPrevWordsLanguageCost(); const float languageImprobability = isExactMatch ? 0.0f : dicNodeLanguageImprobability; - const float languageWeight = ScoringParams::DISTANCE_WEIGHT_LANGUAGE; - // TODO: Caveat: The following equation should be: - // totalPrevWordsLanguageCost + (languageImprobability * languageWeight); - return (totalPrevWordsLanguageCost + languageImprobability) * languageWeight; + return languageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE; } AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const {