Remove duplicate new-word bigram cost (part 1)
Removes a duplicate application of the new word bigram cost and updates only the related parameters (those associated with multi-word suggestions). Note: test results will improve after full optimization. [Category diff] +1 357 -1 485 +2 16 -2 20 +3 20 -3 16 +4 198 -4 226 +5 510 -5 443 +6 518 -6 368 +7 394 -7 455 [Weighted category diff] +1 482 -1 532 +2 22 -2 22 +3 22 -3 22 +4 233 -4 381 +5 578 -5 500 +6 617 -6 498 +7 522 -7 521 Bug: 8633962 Change-Id: I3c3ecc9460e8e03e44925e11b2d4b037a6c3b99e
This commit is contained in:
parent
7a1721753b
commit
90cb956c4f
4 changed files with 6 additions and 31 deletions
|
@ -360,11 +360,6 @@ class DicNode {
|
|||
return mDicNodeState.mDicNodeStateScoring.getCompoundDistance(languageWeight);
|
||||
}
|
||||
|
||||
// Note that "cost" means delta for "distance" that is weighted.
|
||||
float getTotalPrevWordsLanguageCost() const {
|
||||
return mDicNodeState.mDicNodeStateScoring.getTotalPrevWordsLanguageCost();
|
||||
}
|
||||
|
||||
// Used to commit input partially
|
||||
int getPrevWordNodePos() const {
|
||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
|
||||
|
|
|
@ -31,7 +31,7 @@ class DicNodeStateScoring {
|
|||
mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX),
|
||||
mEditCorrectionCount(0), mProximityCorrectionCount(0),
|
||||
mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
|
||||
mTotalPrevWordsLanguageCost(0.0f), mRawLength(0.0f) {
|
||||
mRawLength(0.0f) {
|
||||
}
|
||||
|
||||
virtual ~DicNodeStateScoring() {}
|
||||
|
@ -42,7 +42,6 @@ class DicNodeStateScoring {
|
|||
mNormalizedCompoundDistance = 0.0f;
|
||||
mSpatialDistance = 0.0f;
|
||||
mLanguageDistance = 0.0f;
|
||||
mTotalPrevWordsLanguageCost = 0.0f;
|
||||
mRawLength = 0.0f;
|
||||
mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
|
||||
mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
|
||||
|
@ -54,7 +53,6 @@ class DicNodeStateScoring {
|
|||
mNormalizedCompoundDistance = scoring->mNormalizedCompoundDistance;
|
||||
mSpatialDistance = scoring->mSpatialDistance;
|
||||
mLanguageDistance = scoring->mLanguageDistance;
|
||||
mTotalPrevWordsLanguageCost = scoring->mTotalPrevWordsLanguageCost;
|
||||
mRawLength = scoring->mRawLength;
|
||||
mDoubleLetterLevel = scoring->mDoubleLetterLevel;
|
||||
mDigraphIndex = scoring->mDigraphIndex;
|
||||
|
@ -70,9 +68,6 @@ class DicNodeStateScoring {
|
|||
if (isProximityCorrection) {
|
||||
++mProximityCorrectionCount;
|
||||
}
|
||||
if (languageCost > 0.0f) {
|
||||
setTotalPrevWordsLanguageCost(mTotalPrevWordsLanguageCost + languageCost);
|
||||
}
|
||||
}
|
||||
|
||||
void addRawLength(const float rawLength) {
|
||||
|
@ -148,10 +143,6 @@ class DicNodeStateScoring {
|
|||
}
|
||||
}
|
||||
|
||||
float getTotalPrevWordsLanguageCost() const {
|
||||
return mTotalPrevWordsLanguageCost;
|
||||
}
|
||||
|
||||
private:
|
||||
// Caution!!!
|
||||
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||
|
@ -165,7 +156,6 @@ class DicNodeStateScoring {
|
|||
float mNormalizedCompoundDistance;
|
||||
float mSpatialDistance;
|
||||
float mLanguageDistance;
|
||||
float mTotalPrevWordsLanguageCost;
|
||||
float mRawLength;
|
||||
|
||||
AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance,
|
||||
|
@ -179,11 +169,6 @@ class DicNodeStateScoring {
|
|||
/ static_cast<float>(max(1, totalInputIndex));
|
||||
}
|
||||
}
|
||||
|
||||
//TODO: remove
|
||||
AK_FORCE_INLINE void setTotalPrevWordsLanguageCost(float totalPrevWordsLanguageCost) {
|
||||
mTotalPrevWordsLanguageCost = totalPrevWordsLanguageCost;
|
||||
}
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_STATE_SCORING_H
|
||||
|
|
|
@ -35,17 +35,17 @@ const float ScoringParams::INSERTION_COST = 0.670f;
|
|||
const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.526f;
|
||||
const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.563f;
|
||||
const float ScoringParams::TRANSPOSITION_COST = 0.494f;
|
||||
const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.239f;
|
||||
const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.289f;
|
||||
const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.380f;
|
||||
const float ScoringParams::SUBSTITUTION_COST = 0.363f;
|
||||
const float ScoringParams::COST_NEW_WORD = 0.054f;
|
||||
const float ScoringParams::COST_NEW_WORD = 0.024f;
|
||||
const float ScoringParams::COST_NEW_WORD_CAPITALIZED = 0.174f;
|
||||
const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.123f;
|
||||
const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.462f;
|
||||
const float ScoringParams::COST_LOOKAHEAD = 0.092f;
|
||||
const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.126f;
|
||||
const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.056f;
|
||||
const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.136f;
|
||||
const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.536f;
|
||||
const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
|
||||
const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
|
||||
const float ScoringParams::MAX_NORM_DISTANCE_FOR_EDIT = 0.1f;
|
||||
|
|
|
@ -140,7 +140,7 @@ class TypingWeighting : public Weighting {
|
|||
const DicTraverseSession *const traverseSession, const DicNode *const dicNode,
|
||||
hash_map_compat<int, int16_t> *const bigramCacheMap) const {
|
||||
return DicNodeUtils::getBigramNodeImprobability(traverseSession->getOffsetDict(),
|
||||
dicNode, bigramCacheMap);
|
||||
dicNode, bigramCacheMap) * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
|
||||
}
|
||||
|
||||
float getCompletionCost(const DicTraverseSession *const traverseSession,
|
||||
|
@ -164,13 +164,8 @@ class TypingWeighting : public Weighting {
|
|||
// because the input word shouldn't be treated as perfect
|
||||
const bool isExactMatch = !hasEditCount && !hasMultipleWords
|
||||
&& !hasProximityErrors && isSameLength;
|
||||
|
||||
const float totalPrevWordsLanguageCost = dicNode->getTotalPrevWordsLanguageCost();
|
||||
const float languageImprobability = isExactMatch ? 0.0f : dicNodeLanguageImprobability;
|
||||
const float languageWeight = ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
|
||||
// TODO: Caveat: The following equation should be:
|
||||
// totalPrevWordsLanguageCost + (languageImprobability * languageWeight);
|
||||
return (totalPrevWordsLanguageCost + languageImprobability) * languageWeight;
|
||||
return languageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const {
|
||||
|
|
Loading…
Reference in a new issue