From 2ed09c214afb49ffd0ce348c9a9f22ec23ae9792 Mon Sep 17 00:00:00 2001 From: Xiaojun Bi Date: Thu, 3 Oct 2013 18:15:29 -0700 Subject: [PATCH] Optimize the parameters for the typing algorithm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new set of parameters are optimized for both keystroke savings and regression test scores. This change is corresponding to the patch 6 of Change-Id: Ib8439a85de26cbbba58837344717f0acbfcf0ab0 in the regression test repository. Here is the link to the doc analyzing the “nnow → know” case: https://docs.google.com/a/google.com/document/d/1XIHfAHLlP1f3F8R-u7yxPGWcP58tokygzN_lQKXFFgM/edit Here is the link to the spreadsheet showing the diff of the words with frequencies >= 10 in en_user_log_phones_2011_08.csv. https://docs.google.com/a/google.com/spreadsheet/ccc?key=0Am_RQlV4zC6SdFFVTlhJT0RLUENCWWZOZ3JQTUpMQ2c&usp=drive_web#gid=0 Updated TRT Results for this change: [Category diff] +1 783 -1 717 +2 20 -2 34 +3 34 -3 20 +4 575 -4 777 +5 765 -5 755 +6 785 -6 666 +7 479 -7 472 [Weighted category diff] +1 999 -1 965 +2 21 -2 35 +3 35 -3 21 +4 804 -4 980 +5 970 -5 971 +6 954 -6 831 +7 645 -7 625 Bug: 10613502 Change-Id: I5cd447673a196ad5af641a2118602bec879bdefb --- .../policyimpl/typing/scoring_params.cpp | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp index ecceb60d3..66637ac4b 100644 --- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp @@ -27,30 +27,30 @@ const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE = 170; const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT = 310; const int ScoringParams::THRESHOLD_SHORT_WORD_LENGTH = 4; -const float ScoringParams::DISTANCE_WEIGHT_LENGTH = 0.132f; -const float ScoringParams::PROXIMITY_COST = 0.095f; -const float ScoringParams::FIRST_CHAR_PROXIMITY_COST = 0.102f; -const float ScoringParams::FIRST_PROXIMITY_COST = 0.019f; -const float ScoringParams::OMISSION_COST = 0.458f; -const float ScoringParams::OMISSION_COST_SAME_CHAR = 0.491f; -const float ScoringParams::OMISSION_COST_FIRST_CHAR = 0.582f; -const float ScoringParams::INSERTION_COST = 0.730f; -const float ScoringParams::TERMINAL_INSERTION_COST = 0.93f; -const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.586f; -const float ScoringParams::INSERTION_COST_PROXIMITY_CHAR = 0.70f; -const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.623f; -const float ScoringParams::TRANSPOSITION_COST = 0.526f; -const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.319f; -const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.380f; -const float ScoringParams::SUBSTITUTION_COST = 0.383f; -const float ScoringParams::COST_NEW_WORD = 0.042f; -const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.25f; -const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.123f; -const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.545f; -const float ScoringParams::COST_LOOKAHEAD = 0.073f; -const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.093f; -const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.041f; -const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.447f; +const float ScoringParams::DISTANCE_WEIGHT_LENGTH = 0.1524f; +const float ScoringParams::PROXIMITY_COST = 0.0694f; +const float ScoringParams::FIRST_CHAR_PROXIMITY_COST = 0.072f; +const float ScoringParams::FIRST_PROXIMITY_COST = 0.07788f; +const float ScoringParams::OMISSION_COST = 0.4676f; +const float ScoringParams::OMISSION_COST_SAME_CHAR = 0.399f; +const float ScoringParams::OMISSION_COST_FIRST_CHAR = 0.5256f; +const float ScoringParams::INSERTION_COST = 0.7248f; +const float ScoringParams::TERMINAL_INSERTION_COST = 0.9828f; +const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.5508f; +const float ScoringParams::INSERTION_COST_PROXIMITY_CHAR = 0.674f; +const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.639f; +const float ScoringParams::TRANSPOSITION_COST = 0.5608f; +const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.339f; +const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.4576f; +const float ScoringParams::SUBSTITUTION_COST = 0.3806f; +const float ScoringParams::COST_NEW_WORD = 0.0292f; +const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.3224f; +const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.1214f; +const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.4786f; +const float ScoringParams::COST_LOOKAHEAD = 0.00624f; +const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.06836f; +const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.0362f; +const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.4182f; const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f; const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f; const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.045f;