Fix: huge bigram costs for blacklisted words.
Bug: 8844931 Change-Id: I523005c5ed9a3d401a67b0e4e1c3ff2e4574e6dfmain
parent
a54b8b3f5d
commit
b179199830
|
@ -213,14 +213,18 @@ class DicNode {
|
||||||
return mDicNodeState.mDicNodeStateOutput.getCodePointAt(getNodeCodePointCount());
|
return mDicNodeState.mDicNodeStateOutput.getCodePointAt(getNodeCodePointCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isImpossibleBigramWord() const {
|
// Check if the current word and the previous word can be considered as a valid multiple word
|
||||||
|
// suggestion.
|
||||||
|
bool isValidMultipleWordSuggestion() const {
|
||||||
if (isBlacklistedOrNotAWord()) {
|
if (isBlacklistedOrNotAWord()) {
|
||||||
return true;
|
return false;
|
||||||
}
|
}
|
||||||
|
// Treat suggestion as invalid if the current and the previous word are single character
|
||||||
|
// words.
|
||||||
const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
|
const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
|
||||||
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1;
|
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1;
|
||||||
const int currentWordLen = getNodeCodePointCount();
|
const int currentWordLen = getNodeCodePointCount();
|
||||||
return (prevWordLen == 1 && currentWordLen == 1);
|
return (prevWordLen != 1 || currentWordLen != 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isFirstCharUppercase() const {
|
bool isFirstCharUppercase() const {
|
||||||
|
|
|
@ -159,7 +159,7 @@ namespace latinime {
|
||||||
/* static */ float DicNodeUtils::getBigramNodeImprobability(
|
/* static */ float DicNodeUtils::getBigramNodeImprobability(
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
const DicNode *const node, MultiBigramMap *multiBigramMap) {
|
const DicNode *const node, MultiBigramMap *multiBigramMap) {
|
||||||
if (node->isImpossibleBigramWord()) {
|
if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) {
|
||||||
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
|
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
|
||||||
}
|
}
|
||||||
const int probability = getBigramNodeProbability(binaryDictionaryInfo, node, multiBigramMap);
|
const int probability = getBigramNodeProbability(binaryDictionaryInfo, node, multiBigramMap);
|
||||||
|
|
|
@ -55,10 +55,10 @@ class TypingScoring : public Scoring {
|
||||||
const int inputSize, const bool forceCommit) const {
|
const int inputSize, const bool forceCommit) const {
|
||||||
const float maxDistance = ScoringParams::DISTANCE_WEIGHT_LANGUAGE
|
const float maxDistance = ScoringParams::DISTANCE_WEIGHT_LANGUAGE
|
||||||
+ static_cast<float>(inputSize) * ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT;
|
+ static_cast<float>(inputSize) * ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT;
|
||||||
return static_cast<int>((ScoringParams::TYPING_BASE_OUTPUT_SCORE
|
const float score = ScoringParams::TYPING_BASE_OUTPUT_SCORE
|
||||||
- (compoundDistance / maxDistance)
|
- compoundDistance / maxDistance
|
||||||
+ (forceCommit ? ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD : 0.0f))
|
+ (forceCommit ? ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD : 0.0f);
|
||||||
* SUGGEST_INTERFACE_OUTPUT_SCALE);
|
return static_cast<int>(score * SUGGEST_INTERFACE_OUTPUT_SCALE);
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE float getDoubleLetterDemotionDistanceCost(const int terminalIndex,
|
AK_FORCE_INLINE float getDoubleLetterDemotionDistanceCost(const int terminalIndex,
|
||||||
|
|
Loading…
Reference in New Issue