Remove duplicate new-word bigram cost (part 1)
Removes a duplicate application of the new word bigram cost and updates only the related parameters (those associated with multi-word suggestions). Note: test results will improve after full optimization. [Category diff] +1 357 -1 485 +2 16 -2 20 +3 20 -3 16 +4 198 -4 226 +5 510 -5 443 +6 518 -6 368 +7 394 -7 455 [Weighted category diff] +1 482 -1 532 +2 22 -2 22 +3 22 -3 22 +4 233 -4 381 +5 578 -5 500 +6 617 -6 498 +7 522 -7 521 Bug: 8633962 Change-Id: I3c3ecc9460e8e03e44925e11b2d4b037a6c3b99emain
parent
7a1721753b
commit
90cb956c4f
|
@ -360,11 +360,6 @@ class DicNode {
|
||||||
return mDicNodeState.mDicNodeStateScoring.getCompoundDistance(languageWeight);
|
return mDicNodeState.mDicNodeStateScoring.getCompoundDistance(languageWeight);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note that "cost" means delta for "distance" that is weighted.
|
|
||||||
float getTotalPrevWordsLanguageCost() const {
|
|
||||||
return mDicNodeState.mDicNodeStateScoring.getTotalPrevWordsLanguageCost();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used to commit input partially
|
// Used to commit input partially
|
||||||
int getPrevWordNodePos() const {
|
int getPrevWordNodePos() const {
|
||||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
|
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
|
||||||
|
|
|
@ -31,7 +31,7 @@ class DicNodeStateScoring {
|
||||||
mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX),
|
mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX),
|
||||||
mEditCorrectionCount(0), mProximityCorrectionCount(0),
|
mEditCorrectionCount(0), mProximityCorrectionCount(0),
|
||||||
mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
|
mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
|
||||||
mTotalPrevWordsLanguageCost(0.0f), mRawLength(0.0f) {
|
mRawLength(0.0f) {
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~DicNodeStateScoring() {}
|
virtual ~DicNodeStateScoring() {}
|
||||||
|
@ -42,7 +42,6 @@ class DicNodeStateScoring {
|
||||||
mNormalizedCompoundDistance = 0.0f;
|
mNormalizedCompoundDistance = 0.0f;
|
||||||
mSpatialDistance = 0.0f;
|
mSpatialDistance = 0.0f;
|
||||||
mLanguageDistance = 0.0f;
|
mLanguageDistance = 0.0f;
|
||||||
mTotalPrevWordsLanguageCost = 0.0f;
|
|
||||||
mRawLength = 0.0f;
|
mRawLength = 0.0f;
|
||||||
mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
|
mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
|
||||||
mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
|
mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
|
||||||
|
@ -54,7 +53,6 @@ class DicNodeStateScoring {
|
||||||
mNormalizedCompoundDistance = scoring->mNormalizedCompoundDistance;
|
mNormalizedCompoundDistance = scoring->mNormalizedCompoundDistance;
|
||||||
mSpatialDistance = scoring->mSpatialDistance;
|
mSpatialDistance = scoring->mSpatialDistance;
|
||||||
mLanguageDistance = scoring->mLanguageDistance;
|
mLanguageDistance = scoring->mLanguageDistance;
|
||||||
mTotalPrevWordsLanguageCost = scoring->mTotalPrevWordsLanguageCost;
|
|
||||||
mRawLength = scoring->mRawLength;
|
mRawLength = scoring->mRawLength;
|
||||||
mDoubleLetterLevel = scoring->mDoubleLetterLevel;
|
mDoubleLetterLevel = scoring->mDoubleLetterLevel;
|
||||||
mDigraphIndex = scoring->mDigraphIndex;
|
mDigraphIndex = scoring->mDigraphIndex;
|
||||||
|
@ -70,9 +68,6 @@ class DicNodeStateScoring {
|
||||||
if (isProximityCorrection) {
|
if (isProximityCorrection) {
|
||||||
++mProximityCorrectionCount;
|
++mProximityCorrectionCount;
|
||||||
}
|
}
|
||||||
if (languageCost > 0.0f) {
|
|
||||||
setTotalPrevWordsLanguageCost(mTotalPrevWordsLanguageCost + languageCost);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void addRawLength(const float rawLength) {
|
void addRawLength(const float rawLength) {
|
||||||
|
@ -148,10 +143,6 @@ class DicNodeStateScoring {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
float getTotalPrevWordsLanguageCost() const {
|
|
||||||
return mTotalPrevWordsLanguageCost;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Caution!!!
|
// Caution!!!
|
||||||
// Use a default copy constructor and an assign operator because shallow copies are ok
|
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||||
|
@ -165,7 +156,6 @@ class DicNodeStateScoring {
|
||||||
float mNormalizedCompoundDistance;
|
float mNormalizedCompoundDistance;
|
||||||
float mSpatialDistance;
|
float mSpatialDistance;
|
||||||
float mLanguageDistance;
|
float mLanguageDistance;
|
||||||
float mTotalPrevWordsLanguageCost;
|
|
||||||
float mRawLength;
|
float mRawLength;
|
||||||
|
|
||||||
AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance,
|
AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance,
|
||||||
|
@ -179,11 +169,6 @@ class DicNodeStateScoring {
|
||||||
/ static_cast<float>(max(1, totalInputIndex));
|
/ static_cast<float>(max(1, totalInputIndex));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//TODO: remove
|
|
||||||
AK_FORCE_INLINE void setTotalPrevWordsLanguageCost(float totalPrevWordsLanguageCost) {
|
|
||||||
mTotalPrevWordsLanguageCost = totalPrevWordsLanguageCost;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DIC_NODE_STATE_SCORING_H
|
#endif // LATINIME_DIC_NODE_STATE_SCORING_H
|
||||||
|
|
|
@ -35,17 +35,17 @@ const float ScoringParams::INSERTION_COST = 0.670f;
|
||||||
const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.526f;
|
const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.526f;
|
||||||
const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.563f;
|
const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.563f;
|
||||||
const float ScoringParams::TRANSPOSITION_COST = 0.494f;
|
const float ScoringParams::TRANSPOSITION_COST = 0.494f;
|
||||||
const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.239f;
|
const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.289f;
|
||||||
const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.380f;
|
const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.380f;
|
||||||
const float ScoringParams::SUBSTITUTION_COST = 0.363f;
|
const float ScoringParams::SUBSTITUTION_COST = 0.363f;
|
||||||
const float ScoringParams::COST_NEW_WORD = 0.054f;
|
const float ScoringParams::COST_NEW_WORD = 0.024f;
|
||||||
const float ScoringParams::COST_NEW_WORD_CAPITALIZED = 0.174f;
|
const float ScoringParams::COST_NEW_WORD_CAPITALIZED = 0.174f;
|
||||||
const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.123f;
|
const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.123f;
|
||||||
const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.462f;
|
const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.462f;
|
||||||
const float ScoringParams::COST_LOOKAHEAD = 0.092f;
|
const float ScoringParams::COST_LOOKAHEAD = 0.092f;
|
||||||
const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.126f;
|
const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.126f;
|
||||||
const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.056f;
|
const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.056f;
|
||||||
const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.136f;
|
const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.536f;
|
||||||
const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
|
const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
|
||||||
const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
|
const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
|
||||||
const float ScoringParams::MAX_NORM_DISTANCE_FOR_EDIT = 0.1f;
|
const float ScoringParams::MAX_NORM_DISTANCE_FOR_EDIT = 0.1f;
|
||||||
|
|
|
@ -140,7 +140,7 @@ class TypingWeighting : public Weighting {
|
||||||
const DicTraverseSession *const traverseSession, const DicNode *const dicNode,
|
const DicTraverseSession *const traverseSession, const DicNode *const dicNode,
|
||||||
hash_map_compat<int, int16_t> *const bigramCacheMap) const {
|
hash_map_compat<int, int16_t> *const bigramCacheMap) const {
|
||||||
return DicNodeUtils::getBigramNodeImprobability(traverseSession->getOffsetDict(),
|
return DicNodeUtils::getBigramNodeImprobability(traverseSession->getOffsetDict(),
|
||||||
dicNode, bigramCacheMap);
|
dicNode, bigramCacheMap) * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
|
||||||
}
|
}
|
||||||
|
|
||||||
float getCompletionCost(const DicTraverseSession *const traverseSession,
|
float getCompletionCost(const DicTraverseSession *const traverseSession,
|
||||||
|
@ -164,13 +164,8 @@ class TypingWeighting : public Weighting {
|
||||||
// because the input word shouldn't be treated as perfect
|
// because the input word shouldn't be treated as perfect
|
||||||
const bool isExactMatch = !hasEditCount && !hasMultipleWords
|
const bool isExactMatch = !hasEditCount && !hasMultipleWords
|
||||||
&& !hasProximityErrors && isSameLength;
|
&& !hasProximityErrors && isSameLength;
|
||||||
|
|
||||||
const float totalPrevWordsLanguageCost = dicNode->getTotalPrevWordsLanguageCost();
|
|
||||||
const float languageImprobability = isExactMatch ? 0.0f : dicNodeLanguageImprobability;
|
const float languageImprobability = isExactMatch ? 0.0f : dicNodeLanguageImprobability;
|
||||||
const float languageWeight = ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
|
return languageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
|
||||||
// TODO: Caveat: The following equation should be:
|
|
||||||
// totalPrevWordsLanguageCost + (languageImprobability * languageWeight);
|
|
||||||
return (totalPrevWordsLanguageCost + languageImprobability) * languageWeight;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const {
|
AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const {
|
||||||
|
|
Loading…
Reference in New Issue