Improve space substitution error correction.

Bug: 17432052

[Category diff]
+1     262
-1      93
+2       2
-2      18
+3      18
-3       2
+4     111
-4     148
+5     295
-5     217
+6      51
-6     276
+7     139
-7     124

[Weighted category diff]
+1     276
-1     100
+2       4
-2      20
+3      20
-3       4
+4     118
-4     160
+5     309
-5     225
+6      52
-6     298
+7     163
-7     135

show diff for ./en_user_log_phones_2011_08.csv
+1     173
-1      28
+2       2
-2      17
+3      17
-3       2
+4      63
-4      82
+5     120
-5      51
+6      24
-6     220
+7      88
-7      87

Change-Id: I9d673acb0ff632828ae2e0ead56e76e3a20411c6
This commit is contained in:
Keisuke Kuroyanagi 2014-10-28 17:11:14 +09:00
parent dd5737b0fa
commit 8a809f3433
6 changed files with 14 additions and 11 deletions

View file

@ -119,7 +119,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
return weighting->getSubstitutionCost()
+ weighting->getMatchedCost(traverseSession, dicNode, inputStateG);
case CT_NEW_WORD_SPACE_OMISSION:
return weighting->getNewWordSpatialCost(traverseSession, dicNode, inputStateG);
return weighting->getSpaceOmissionCost(traverseSession, dicNode, inputStateG);
case CT_MATCH:
return weighting->getMatchedCost(traverseSession, dicNode, inputStateG);
case CT_COMPLETION:

View file

@ -57,7 +57,7 @@ class Weighting {
const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;
virtual float getNewWordSpatialCost(const DicTraverseSession *const traverseSession,
virtual float getSpaceOmissionCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode, DicNode_InputStateG *const inputStateG) const = 0;
virtual float getNewWordBigramLanguageCost(

View file

@ -160,8 +160,7 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
// TODO: Remove. Do not prune node here.
const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrections(&dicNode);
// Process for handling space substitution (e.g., hevis => he is)
if (allowsErrorCorrections
&& TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) {
if (TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) {
createNextWordDicNode(traverseSession, &dicNode, true /* spaceSubstitution */);
}

View file

@ -48,17 +48,17 @@ const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.5508f;
const float ScoringParams::INSERTION_COST_PROXIMITY_CHAR = 0.674f;
const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.639f;
const float ScoringParams::TRANSPOSITION_COST = 0.5608f;
const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.334f;
const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.33f;
const float ScoringParams::SPACE_OMISSION_COST = 0.1f;
const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.37972f;
const float ScoringParams::SUBSTITUTION_COST = 0.3806f;
const float ScoringParams::COST_NEW_WORD = 0.0314f;
const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.3224f;
const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.1214f;
const float ScoringParams::COST_FIRST_COMPLETION = 0.4836f;
const float ScoringParams::COST_COMPLETION = 0.00624f;
const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.0683f;
const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.0362f;
const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.4182f;
const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.3482f;
const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.095f;

View file

@ -56,9 +56,9 @@ class ScoringParams {
static const float INSERTION_COST_FIRST_CHAR;
static const float TRANSPOSITION_COST;
static const float SPACE_SUBSTITUTION_COST;
static const float SPACE_OMISSION_COST;
static const float ADDITIONAL_PROXIMITY_COST;
static const float SUBSTITUTION_COST;
static const float COST_NEW_WORD;
static const float COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE;
static const float DISTANCE_WEIGHT_LANGUAGE;
static const float COST_FIRST_COMPLETION;

View file

@ -150,9 +150,10 @@ class TypingWeighting : public Weighting {
return cost + weightedDistance;
}
float getNewWordSpatialCost(const DicTraverseSession *const traverseSession,
float getSpaceOmissionCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode, DicNode_InputStateG *inputStateG) const {
return ScoringParams::COST_NEW_WORD * traverseSession->getMultiWordCostMultiplier();
const float cost = ScoringParams::SPACE_OMISSION_COST;
return cost * traverseSession->getMultiWordCostMultiplier();
}
float getNewWordBigramLanguageCost(const DicTraverseSession *const traverseSession,
@ -202,7 +203,10 @@ class TypingWeighting : public Weighting {
AK_FORCE_INLINE float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode) const {
const float cost = ScoringParams::SPACE_SUBSTITUTION_COST + ScoringParams::COST_NEW_WORD;
const int inputIndex = dicNode->getInputIndex(0);
const float distanceToSpaceKey = traverseSession->getProximityInfoState(0)
->getPointToKeyLength(inputIndex, KEYCODE_SPACE);
const float cost = ScoringParams::SPACE_SUBSTITUTION_COST * distanceToSpaceKey;
return cost * traverseSession->getMultiWordCostMultiplier();
}