Remove duplicate new-word bigram cost (part 1)

Removes a duplicate application of the new word bigram cost and updates only
the related parameters (those associated with multi-word suggestions).

Note: test results will improve after full optimization.

[Category diff]
+1     357
-1     485
+2      16
-2      20
+3      20
-3      16
+4     198
-4     226
+5     510
-5     443
+6     518
-6     368
+7     394
-7     455

[Weighted category diff]
+1     482
-1     532
+2      22
-2      22
+3      22
-3      22
+4     233
-4     381
+5     578
-5     500
+6     617
-6     498
+7     522
-7     521

Bug: 8633962
Change-Id: I3c3ecc9460e8e03e44925e11b2d4b037a6c3b99e
This commit is contained in:
Tom Ouyang 2013-04-16 16:51:55 -07:00
parent 7a1721753b
commit 90cb956c4f
4 changed files with 6 additions and 31 deletions

View file

@ -360,11 +360,6 @@ class DicNode {
return mDicNodeState.mDicNodeStateScoring.getCompoundDistance(languageWeight);
}
// Note that "cost" means delta for "distance" that is weighted.
float getTotalPrevWordsLanguageCost() const {
return mDicNodeState.mDicNodeStateScoring.getTotalPrevWordsLanguageCost();
}
// Used to commit input partially
int getPrevWordNodePos() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();

View file

@ -31,7 +31,7 @@ class DicNodeStateScoring {
mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX),
mEditCorrectionCount(0), mProximityCorrectionCount(0),
mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
mTotalPrevWordsLanguageCost(0.0f), mRawLength(0.0f) {
mRawLength(0.0f) {
}
virtual ~DicNodeStateScoring() {}
@ -42,7 +42,6 @@ class DicNodeStateScoring {
mNormalizedCompoundDistance = 0.0f;
mSpatialDistance = 0.0f;
mLanguageDistance = 0.0f;
mTotalPrevWordsLanguageCost = 0.0f;
mRawLength = 0.0f;
mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
@ -54,7 +53,6 @@ class DicNodeStateScoring {
mNormalizedCompoundDistance = scoring->mNormalizedCompoundDistance;
mSpatialDistance = scoring->mSpatialDistance;
mLanguageDistance = scoring->mLanguageDistance;
mTotalPrevWordsLanguageCost = scoring->mTotalPrevWordsLanguageCost;
mRawLength = scoring->mRawLength;
mDoubleLetterLevel = scoring->mDoubleLetterLevel;
mDigraphIndex = scoring->mDigraphIndex;
@ -70,9 +68,6 @@ class DicNodeStateScoring {
if (isProximityCorrection) {
++mProximityCorrectionCount;
}
if (languageCost > 0.0f) {
setTotalPrevWordsLanguageCost(mTotalPrevWordsLanguageCost + languageCost);
}
}
void addRawLength(const float rawLength) {
@ -148,10 +143,6 @@ class DicNodeStateScoring {
}
}
float getTotalPrevWordsLanguageCost() const {
return mTotalPrevWordsLanguageCost;
}
private:
// Caution!!!
// Use a default copy constructor and an assign operator because shallow copies are ok
@ -165,7 +156,6 @@ class DicNodeStateScoring {
float mNormalizedCompoundDistance;
float mSpatialDistance;
float mLanguageDistance;
float mTotalPrevWordsLanguageCost;
float mRawLength;
AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance,
@ -179,11 +169,6 @@ class DicNodeStateScoring {
/ static_cast<float>(max(1, totalInputIndex));
}
}
//TODO: remove
AK_FORCE_INLINE void setTotalPrevWordsLanguageCost(float totalPrevWordsLanguageCost) {
mTotalPrevWordsLanguageCost = totalPrevWordsLanguageCost;
}
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_STATE_SCORING_H

View file

@ -35,17 +35,17 @@ const float ScoringParams::INSERTION_COST = 0.670f;
const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.526f;
const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.563f;
const float ScoringParams::TRANSPOSITION_COST = 0.494f;
const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.239f;
const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.289f;
const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.380f;
const float ScoringParams::SUBSTITUTION_COST = 0.363f;
const float ScoringParams::COST_NEW_WORD = 0.054f;
const float ScoringParams::COST_NEW_WORD = 0.024f;
const float ScoringParams::COST_NEW_WORD_CAPITALIZED = 0.174f;
const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.123f;
const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.462f;
const float ScoringParams::COST_LOOKAHEAD = 0.092f;
const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.126f;
const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.056f;
const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.136f;
const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.536f;
const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
const float ScoringParams::MAX_NORM_DISTANCE_FOR_EDIT = 0.1f;

View file

@ -140,7 +140,7 @@ class TypingWeighting : public Weighting {
const DicTraverseSession *const traverseSession, const DicNode *const dicNode,
hash_map_compat<int, int16_t> *const bigramCacheMap) const {
return DicNodeUtils::getBigramNodeImprobability(traverseSession->getOffsetDict(),
dicNode, bigramCacheMap);
dicNode, bigramCacheMap) * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
}
float getCompletionCost(const DicTraverseSession *const traverseSession,
@ -164,13 +164,8 @@ class TypingWeighting : public Weighting {
// because the input word shouldn't be treated as perfect
const bool isExactMatch = !hasEditCount && !hasMultipleWords
&& !hasProximityErrors && isSameLength;
const float totalPrevWordsLanguageCost = dicNode->getTotalPrevWordsLanguageCost();
const float languageImprobability = isExactMatch ? 0.0f : dicNodeLanguageImprobability;
const float languageWeight = ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
// TODO: Caveat: The following equation should be:
// totalPrevWordsLanguageCost + (languageImprobability * languageWeight);
return (totalPrevWordsLanguageCost + languageImprobability) * languageWeight;
return languageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
}
AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const {