diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 32ff0ce18..7777de7d1 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -136,14 +136,14 @@ class DicNode { } void initAsChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos, - const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord, - const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { + const int unigramProbability, const int wordId, const uint16_t mergedNodeCodePointCount, + const int *const mergedNodeCodePoints) { uint16_t newDepth = static_cast(dicNode->getNodeCodePointCount() + 1); mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; const uint16_t newLeavingDepth = static_cast( dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); mDicNodeProperties.init(childrenPtNodeArrayPos, mergedNodeCodePoints[0], - unigramProbability, wordId, isBlacklistedOrNotAWord, newDepth, newLeavingDepth, + unigramProbability, wordId, newDepth, newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordIds()); mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, mergedNodeCodePoints); @@ -178,9 +178,6 @@ class DicNode { // Check if the current word and the previous word can be considered as a valid multiple word // suggestion. bool isValidMultipleWordSuggestion() const { - if (isBlacklistedOrNotAWord()) { - return false; - } // Treat suggestion as invalid if the current and the previous word are single character // words. const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength() @@ -404,10 +401,6 @@ class DicNode { return mDicNodeState.mDicNodeStateScoring.getContainedErrorTypes(); } - bool isBlacklistedOrNotAWord() const { - return mDicNodeProperties.isBlacklistedOrNotAWord(); - } - inline uint16_t getNodeCodePointCount() const { return mDicNodeProperties.getDepth(); } diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index 26c7e3357..fe5fe8448 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -74,6 +74,10 @@ namespace latinime { } const WordAttributes wordAttributes = dictionaryStructurePolicy->getWordAttributesInContext( dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap); + if (dicNode->hasMultipleWords() + && (wordAttributes.isBlacklisted() || wordAttributes.isNotAWord())) { + return static_cast(MAX_VALUE_FOR_WEIGHTING); + } // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. const float cost = static_cast(MAX_PROBABILITY - wordAttributes.getProbability()) / static_cast(MAX_PROBABILITY); diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h index dfeb3fc1f..fa491dcf9 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h @@ -59,12 +59,12 @@ class DicNodeVector { } void pushLeavingChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos, - const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord, + const int unigramProbability, const int wordId, const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { ASSERT(!mLock); mDicNodes.emplace_back(); mDicNodes.back().initAsChild(dicNode, childrenPtNodeArrayPos, unigramProbability, - wordId, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints); + wordId, mergedNodeCodePointCount, mergedNodeCodePoints); } DicNode *operator[](const int id) { diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h index 6a8377a1b..62f2e1452 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h @@ -30,20 +30,19 @@ class DicNodeProperties { public: AK_FORCE_INLINE DicNodeProperties() : mChildrenPtNodeArrayPos(NOT_A_DICT_POS), mUnigramProbability(NOT_A_PROBABILITY), - mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID), - mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {} + mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID), mDepth(0), + mLeavingDepth(0) {} ~DicNodeProperties() {} // Should be called only once per DicNode is initialized. void init(const int childrenPos, const int nodeCodePoint, const int unigramProbability, - const int wordId, const bool isBlacklistedOrNotAWord, const uint16_t depth, - const uint16_t leavingDepth, const int *const prevWordIds) { + const int wordId, const uint16_t depth, const uint16_t leavingDepth, + const int *const prevWordIds) { mChildrenPtNodeArrayPos = childrenPos; mDicNodeCodePoint = nodeCodePoint; mUnigramProbability = unigramProbability; mWordId = wordId; - mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord; mDepth = depth; mLeavingDepth = leavingDepth; memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds)); @@ -55,7 +54,6 @@ class DicNodeProperties { mDicNodeCodePoint = NOT_A_CODE_POINT; mUnigramProbability = NOT_A_PROBABILITY; mWordId = NOT_A_WORD_ID; - mIsBlacklistedOrNotAWord = false; mDepth = 0; mLeavingDepth = 0; memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds)); @@ -66,7 +64,6 @@ class DicNodeProperties { mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint; mUnigramProbability = dicNodeProp->mUnigramProbability; mWordId = dicNodeProp->mWordId; - mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mDepth = dicNodeProp->mDepth; mLeavingDepth = dicNodeProp->mLeavingDepth; memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds)); @@ -78,7 +75,6 @@ class DicNodeProperties { mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child mUnigramProbability = dicNodeProp->mUnigramProbability; mWordId = dicNodeProp->mWordId; - mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child mLeavingDepth = dicNodeProp->mLeavingDepth; memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds)); @@ -113,10 +109,6 @@ class DicNodeProperties { return (mChildrenPtNodeArrayPos != NOT_A_DICT_POS) || mDepth != mLeavingDepth; } - bool isBlacklistedOrNotAWord() const { - return mIsBlacklistedOrNotAWord; - } - const int *getPrevWordIds() const { return mPrevWordIds; } @@ -134,8 +126,6 @@ class DicNodeProperties { int mUnigramProbability; int mDicNodeCodePoint; int mWordId; - // TODO: Remove - bool mIsBlacklistedOrNotAWord; uint16_t mDepth; uint16_t mLeavingDepth; int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp index cecb4e216..6e0193772 100644 --- a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp +++ b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp @@ -85,9 +85,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode); const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight) + doubleLetterCost; - const bool isPossiblyOffensiveWord = - traverseSession->getDictionaryStructurePolicy()->getProbability( - terminalDicNode->getUnigramProbability(), NOT_A_PROBABILITY) <= 0; + const WordAttributes wordAttributes = traverseSession->getDictionaryStructurePolicy() + ->getWordAttributesInContext(terminalDicNode->getPrevWordIds(), + terminalDicNode->getWordId(), nullptr /* multiBigramMap */); const bool isExactMatch = ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes()); const bool isExactMatchWithIntentionalOmission = @@ -97,19 +97,19 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; // Heuristic: We exclude probability=0 first-char-uppercase words from exact match. // (e.g. "AMD" and "and") const bool isSafeExactMatch = isExactMatch - && !(isPossiblyOffensiveWord && isFirstCharUppercase); + && !(wordAttributes.isPossiblyOffensive() && isFirstCharUppercase); const int outputTypeFlags = - (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0) + (wordAttributes.isPossiblyOffensive() ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0) | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0) | (isExactMatchWithIntentionalOmission ? Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0); // Entries that are blacklisted or do not represent a word should not be output. - const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord(); + const bool isValidWord = !(wordAttributes.isBlacklisted() || wordAttributes.isNotAWord()); // When we have to block offensive words, non-exact matched offensive words should not be // output. const bool blockOffensiveWords = traverseSession->getSuggestOptions()->blockOffensiveWords(); - const bool isBlockedOffensiveWord = blockOffensiveWords && isPossiblyOffensiveWord + const bool isBlockedOffensiveWord = blockOffensiveWords && wordAttributes.isPossiblyOffensive() && !isSafeExactMatch; // Increase output score of top typing suggestion to ensure autocorrection. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 547cc997c..1871044a5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -79,10 +79,8 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d } const int wordId = isTerminal ? ptNodeParams.getHeadPos() : NOT_A_WORD_ID; childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(), - ptNodeParams.getProbability(), wordId, - ptNodeParams.isBlacklisted() - || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, - ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); + ptNodeParams.getProbability(), wordId, ptNodeParams.getCodePointCount(), + ptNodeParams.getCodePoints()); } if (readingHelper.isError()) { mIsCorrupted = true; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 44148e817..3c22edb7b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -408,8 +408,6 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) { const int wordId = PatriciaTrieReadingUtils::isTerminal(flags) ? ptNodePos : NOT_A_WORD_ID; childDicNodes->pushLeavingChild(dicNode, childrenPos, probability, wordId, - PatriciaTrieReadingUtils::isBlacklisted(flags) - || PatriciaTrieReadingUtils::isNotAWord(flags), mergedNodeCodePointCount, mergedNodeCodePoints); } return siblingPos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index e4462550e..be70c5c47 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -70,8 +70,6 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID; childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), wordId, - ptNodeParams.isBlacklisted() - || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); } if (readingHelper.isError()) {