diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 92783dec7..4225bb3e5 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -219,7 +219,7 @@ class DicNode { return (prevWordLen == 1 && currentWordLen == 1); } - bool isCapitalized() const { + bool isFirstCharUppercase() const { const int c = getOutputWordBuf()[0]; return isAsciiUpper(c); } diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp index 299ca83ab..81dd52370 100644 --- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp @@ -39,7 +39,7 @@ const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.319f; const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.380f; const float ScoringParams::SUBSTITUTION_COST = 0.403f; const float ScoringParams::COST_NEW_WORD = 0.042f; -const float ScoringParams::COST_NEW_WORD_CAPITALIZED = 0.174f; +const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.25f; const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.123f; const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.545f; const float ScoringParams::COST_LOOKAHEAD = 0.073f; diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h index 8f104b362..035497bf4 100644 --- a/native/jni/src/suggest/policyimpl/typing/scoring_params.h +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h @@ -48,7 +48,7 @@ class ScoringParams { static const float ADDITIONAL_PROXIMITY_COST; static const float SUBSTITUTION_COST; static const float COST_NEW_WORD; - static const float COST_NEW_WORD_CAPITALIZED; + static const float COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE; static const float DISTANCE_WEIGHT_LANGUAGE; static const float COST_FIRST_LOOKAHEAD; static const float COST_LOOKAHEAD; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index e6fa1bdc4..3938c0ec5 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -80,8 +80,18 @@ class TypingWeighting : public Weighting { const bool isFirstChar = pointIndex == 0; const bool isProximity = isProximityDicNode(traverseSession, dicNode); - const float cost = isProximity ? (isFirstChar ? ScoringParams::FIRST_PROXIMITY_COST + float cost = isProximity ? (isFirstChar ? ScoringParams::FIRST_PROXIMITY_COST : ScoringParams::PROXIMITY_COST) : 0.0f; + if (dicNode->getDepth() == 2) { + // At the second character of the current word, we check if the first char is uppercase + // and the word is a second or later word of a multiple word suggestion. We demote it + // if so. + const bool isSecondOrLaterWordFirstCharUppercase = + dicNode->hasMultipleWords() && dicNode->isFirstCharUppercase(); + if (isSecondOrLaterWordFirstCharUppercase) { + cost += ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE; + } + } return weightedDistance + cost; } @@ -129,10 +139,7 @@ class TypingWeighting : public Weighting { float getNewWordCost(const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const { - const bool isCapitalized = dicNode->isCapitalized(); - const float cost = isCapitalized ? - ScoringParams::COST_NEW_WORD_CAPITALIZED : ScoringParams::COST_NEW_WORD; - return cost * traverseSession->getMultiWordCostMultiplier(); + return ScoringParams::COST_NEW_WORD * traverseSession->getMultiWordCostMultiplier(); } float getNewWordBigramCost(const DicTraverseSession *const traverseSession, @@ -174,9 +181,7 @@ class TypingWeighting : public Weighting { AK_FORCE_INLINE float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const { - const bool isCapitalized = dicNode->isCapitalized(); - const float cost = ScoringParams::SPACE_SUBSTITUTION_COST + (isCapitalized ? - ScoringParams::COST_NEW_WORD_CAPITALIZED : ScoringParams::COST_NEW_WORD); + const float cost = ScoringParams::SPACE_SUBSTITUTION_COST + ScoringParams::COST_NEW_WORD; return cost * traverseSession->getMultiWordCostMultiplier(); }