Improve space substitution error correction.
Bug: 17432052 [Category diff] +1 262 -1 93 +2 2 -2 18 +3 18 -3 2 +4 111 -4 148 +5 295 -5 217 +6 51 -6 276 +7 139 -7 124 [Weighted category diff] +1 276 -1 100 +2 4 -2 20 +3 20 -3 4 +4 118 -4 160 +5 309 -5 225 +6 52 -6 298 +7 163 -7 135 show diff for ./en_user_log_phones_2011_08.csv +1 173 -1 28 +2 2 -2 17 +3 17 -3 2 +4 63 -4 82 +5 120 -5 51 +6 24 -6 220 +7 88 -7 87 Change-Id: I9d673acb0ff632828ae2e0ead56e76e3a20411c6main
parent
dd5737b0fa
commit
8a809f3433
|
@ -119,7 +119,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
|
||||||
return weighting->getSubstitutionCost()
|
return weighting->getSubstitutionCost()
|
||||||
+ weighting->getMatchedCost(traverseSession, dicNode, inputStateG);
|
+ weighting->getMatchedCost(traverseSession, dicNode, inputStateG);
|
||||||
case CT_NEW_WORD_SPACE_OMISSION:
|
case CT_NEW_WORD_SPACE_OMISSION:
|
||||||
return weighting->getNewWordSpatialCost(traverseSession, dicNode, inputStateG);
|
return weighting->getSpaceOmissionCost(traverseSession, dicNode, inputStateG);
|
||||||
case CT_MATCH:
|
case CT_MATCH:
|
||||||
return weighting->getMatchedCost(traverseSession, dicNode, inputStateG);
|
return weighting->getMatchedCost(traverseSession, dicNode, inputStateG);
|
||||||
case CT_COMPLETION:
|
case CT_COMPLETION:
|
||||||
|
|
|
@ -57,7 +57,7 @@ class Weighting {
|
||||||
const DicTraverseSession *const traverseSession,
|
const DicTraverseSession *const traverseSession,
|
||||||
const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;
|
const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;
|
||||||
|
|
||||||
virtual float getNewWordSpatialCost(const DicTraverseSession *const traverseSession,
|
virtual float getSpaceOmissionCost(const DicTraverseSession *const traverseSession,
|
||||||
const DicNode *const dicNode, DicNode_InputStateG *const inputStateG) const = 0;
|
const DicNode *const dicNode, DicNode_InputStateG *const inputStateG) const = 0;
|
||||||
|
|
||||||
virtual float getNewWordBigramLanguageCost(
|
virtual float getNewWordBigramLanguageCost(
|
||||||
|
|
|
@ -160,8 +160,7 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
|
||||||
// TODO: Remove. Do not prune node here.
|
// TODO: Remove. Do not prune node here.
|
||||||
const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrections(&dicNode);
|
const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrections(&dicNode);
|
||||||
// Process for handling space substitution (e.g., hevis => he is)
|
// Process for handling space substitution (e.g., hevis => he is)
|
||||||
if (allowsErrorCorrections
|
if (TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) {
|
||||||
&& TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) {
|
|
||||||
createNextWordDicNode(traverseSession, &dicNode, true /* spaceSubstitution */);
|
createNextWordDicNode(traverseSession, &dicNode, true /* spaceSubstitution */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -48,17 +48,17 @@ const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.5508f;
|
||||||
const float ScoringParams::INSERTION_COST_PROXIMITY_CHAR = 0.674f;
|
const float ScoringParams::INSERTION_COST_PROXIMITY_CHAR = 0.674f;
|
||||||
const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.639f;
|
const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.639f;
|
||||||
const float ScoringParams::TRANSPOSITION_COST = 0.5608f;
|
const float ScoringParams::TRANSPOSITION_COST = 0.5608f;
|
||||||
const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.334f;
|
const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.33f;
|
||||||
|
const float ScoringParams::SPACE_OMISSION_COST = 0.1f;
|
||||||
const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.37972f;
|
const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.37972f;
|
||||||
const float ScoringParams::SUBSTITUTION_COST = 0.3806f;
|
const float ScoringParams::SUBSTITUTION_COST = 0.3806f;
|
||||||
const float ScoringParams::COST_NEW_WORD = 0.0314f;
|
|
||||||
const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.3224f;
|
const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.3224f;
|
||||||
const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.1214f;
|
const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.1214f;
|
||||||
const float ScoringParams::COST_FIRST_COMPLETION = 0.4836f;
|
const float ScoringParams::COST_FIRST_COMPLETION = 0.4836f;
|
||||||
const float ScoringParams::COST_COMPLETION = 0.00624f;
|
const float ScoringParams::COST_COMPLETION = 0.00624f;
|
||||||
const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.0683f;
|
const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.0683f;
|
||||||
const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.0362f;
|
const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.0362f;
|
||||||
const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.4182f;
|
const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.3482f;
|
||||||
const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
|
const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
|
||||||
const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
|
const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
|
||||||
const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.095f;
|
const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.095f;
|
||||||
|
|
|
@ -56,9 +56,9 @@ class ScoringParams {
|
||||||
static const float INSERTION_COST_FIRST_CHAR;
|
static const float INSERTION_COST_FIRST_CHAR;
|
||||||
static const float TRANSPOSITION_COST;
|
static const float TRANSPOSITION_COST;
|
||||||
static const float SPACE_SUBSTITUTION_COST;
|
static const float SPACE_SUBSTITUTION_COST;
|
||||||
|
static const float SPACE_OMISSION_COST;
|
||||||
static const float ADDITIONAL_PROXIMITY_COST;
|
static const float ADDITIONAL_PROXIMITY_COST;
|
||||||
static const float SUBSTITUTION_COST;
|
static const float SUBSTITUTION_COST;
|
||||||
static const float COST_NEW_WORD;
|
|
||||||
static const float COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE;
|
static const float COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE;
|
||||||
static const float DISTANCE_WEIGHT_LANGUAGE;
|
static const float DISTANCE_WEIGHT_LANGUAGE;
|
||||||
static const float COST_FIRST_COMPLETION;
|
static const float COST_FIRST_COMPLETION;
|
||||||
|
|
|
@ -150,9 +150,10 @@ class TypingWeighting : public Weighting {
|
||||||
return cost + weightedDistance;
|
return cost + weightedDistance;
|
||||||
}
|
}
|
||||||
|
|
||||||
float getNewWordSpatialCost(const DicTraverseSession *const traverseSession,
|
float getSpaceOmissionCost(const DicTraverseSession *const traverseSession,
|
||||||
const DicNode *const dicNode, DicNode_InputStateG *inputStateG) const {
|
const DicNode *const dicNode, DicNode_InputStateG *inputStateG) const {
|
||||||
return ScoringParams::COST_NEW_WORD * traverseSession->getMultiWordCostMultiplier();
|
const float cost = ScoringParams::SPACE_OMISSION_COST;
|
||||||
|
return cost * traverseSession->getMultiWordCostMultiplier();
|
||||||
}
|
}
|
||||||
|
|
||||||
float getNewWordBigramLanguageCost(const DicTraverseSession *const traverseSession,
|
float getNewWordBigramLanguageCost(const DicTraverseSession *const traverseSession,
|
||||||
|
@ -202,7 +203,10 @@ class TypingWeighting : public Weighting {
|
||||||
|
|
||||||
AK_FORCE_INLINE float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession,
|
AK_FORCE_INLINE float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession,
|
||||||
const DicNode *const dicNode) const {
|
const DicNode *const dicNode) const {
|
||||||
const float cost = ScoringParams::SPACE_SUBSTITUTION_COST + ScoringParams::COST_NEW_WORD;
|
const int inputIndex = dicNode->getInputIndex(0);
|
||||||
|
const float distanceToSpaceKey = traverseSession->getProximityInfoState(0)
|
||||||
|
->getPointToKeyLength(inputIndex, KEYCODE_SPACE);
|
||||||
|
const float cost = ScoringParams::SPACE_SUBSTITUTION_COST * distanceToSpaceKey;
|
||||||
return cost * traverseSession->getMultiWordCostMultiplier();
|
return cost * traverseSession->getMultiWordCostMultiplier();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue