Merge "Use WordAttributes for checking flags."
This commit is contained in:
commit
ae41d35971
8 changed files with 22 additions and 41 deletions
|
@ -136,14 +136,14 @@ class DicNode {
|
|||
}
|
||||
|
||||
void initAsChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos,
|
||||
const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord,
|
||||
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
|
||||
const int unigramProbability, const int wordId, const uint16_t mergedNodeCodePointCount,
|
||||
const int *const mergedNodeCodePoints) {
|
||||
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
|
||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||
const uint16_t newLeavingDepth = static_cast<uint16_t>(
|
||||
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
|
||||
mDicNodeProperties.init(childrenPtNodeArrayPos, mergedNodeCodePoints[0],
|
||||
unigramProbability, wordId, isBlacklistedOrNotAWord, newDepth, newLeavingDepth,
|
||||
unigramProbability, wordId, newDepth, newLeavingDepth,
|
||||
dicNode->mDicNodeProperties.getPrevWordIds());
|
||||
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
|
||||
mergedNodeCodePoints);
|
||||
|
@ -178,9 +178,6 @@ class DicNode {
|
|||
// Check if the current word and the previous word can be considered as a valid multiple word
|
||||
// suggestion.
|
||||
bool isValidMultipleWordSuggestion() const {
|
||||
if (isBlacklistedOrNotAWord()) {
|
||||
return false;
|
||||
}
|
||||
// Treat suggestion as invalid if the current and the previous word are single character
|
||||
// words.
|
||||
const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
|
||||
|
@ -404,10 +401,6 @@ class DicNode {
|
|||
return mDicNodeState.mDicNodeStateScoring.getContainedErrorTypes();
|
||||
}
|
||||
|
||||
bool isBlacklistedOrNotAWord() const {
|
||||
return mDicNodeProperties.isBlacklistedOrNotAWord();
|
||||
}
|
||||
|
||||
inline uint16_t getNodeCodePointCount() const {
|
||||
return mDicNodeProperties.getDepth();
|
||||
}
|
||||
|
|
|
@ -74,6 +74,10 @@ namespace latinime {
|
|||
}
|
||||
const WordAttributes wordAttributes = dictionaryStructurePolicy->getWordAttributesInContext(
|
||||
dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap);
|
||||
if (dicNode->hasMultipleWords()
|
||||
&& (wordAttributes.isBlacklisted() || wordAttributes.isNotAWord())) {
|
||||
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
|
||||
}
|
||||
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
|
||||
const float cost = static_cast<float>(MAX_PROBABILITY - wordAttributes.getProbability())
|
||||
/ static_cast<float>(MAX_PROBABILITY);
|
||||
|
|
|
@ -59,12 +59,12 @@ class DicNodeVector {
|
|||
}
|
||||
|
||||
void pushLeavingChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos,
|
||||
const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord,
|
||||
const int unigramProbability, const int wordId,
|
||||
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
|
||||
ASSERT(!mLock);
|
||||
mDicNodes.emplace_back();
|
||||
mDicNodes.back().initAsChild(dicNode, childrenPtNodeArrayPos, unigramProbability,
|
||||
wordId, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints);
|
||||
wordId, mergedNodeCodePointCount, mergedNodeCodePoints);
|
||||
}
|
||||
|
||||
DicNode *operator[](const int id) {
|
||||
|
|
|
@ -30,20 +30,19 @@ class DicNodeProperties {
|
|||
public:
|
||||
AK_FORCE_INLINE DicNodeProperties()
|
||||
: mChildrenPtNodeArrayPos(NOT_A_DICT_POS), mUnigramProbability(NOT_A_PROBABILITY),
|
||||
mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID),
|
||||
mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
|
||||
mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID), mDepth(0),
|
||||
mLeavingDepth(0) {}
|
||||
|
||||
~DicNodeProperties() {}
|
||||
|
||||
// Should be called only once per DicNode is initialized.
|
||||
void init(const int childrenPos, const int nodeCodePoint, const int unigramProbability,
|
||||
const int wordId, const bool isBlacklistedOrNotAWord, const uint16_t depth,
|
||||
const uint16_t leavingDepth, const int *const prevWordIds) {
|
||||
const int wordId, const uint16_t depth, const uint16_t leavingDepth,
|
||||
const int *const prevWordIds) {
|
||||
mChildrenPtNodeArrayPos = childrenPos;
|
||||
mDicNodeCodePoint = nodeCodePoint;
|
||||
mUnigramProbability = unigramProbability;
|
||||
mWordId = wordId;
|
||||
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
|
||||
mDepth = depth;
|
||||
mLeavingDepth = leavingDepth;
|
||||
memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds));
|
||||
|
@ -55,7 +54,6 @@ class DicNodeProperties {
|
|||
mDicNodeCodePoint = NOT_A_CODE_POINT;
|
||||
mUnigramProbability = NOT_A_PROBABILITY;
|
||||
mWordId = NOT_A_WORD_ID;
|
||||
mIsBlacklistedOrNotAWord = false;
|
||||
mDepth = 0;
|
||||
mLeavingDepth = 0;
|
||||
memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds));
|
||||
|
@ -66,7 +64,6 @@ class DicNodeProperties {
|
|||
mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
|
||||
mUnigramProbability = dicNodeProp->mUnigramProbability;
|
||||
mWordId = dicNodeProp->mWordId;
|
||||
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
|
||||
mDepth = dicNodeProp->mDepth;
|
||||
mLeavingDepth = dicNodeProp->mLeavingDepth;
|
||||
memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds));
|
||||
|
@ -78,7 +75,6 @@ class DicNodeProperties {
|
|||
mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
|
||||
mUnigramProbability = dicNodeProp->mUnigramProbability;
|
||||
mWordId = dicNodeProp->mWordId;
|
||||
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
|
||||
mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
|
||||
mLeavingDepth = dicNodeProp->mLeavingDepth;
|
||||
memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds));
|
||||
|
@ -113,10 +109,6 @@ class DicNodeProperties {
|
|||
return (mChildrenPtNodeArrayPos != NOT_A_DICT_POS) || mDepth != mLeavingDepth;
|
||||
}
|
||||
|
||||
bool isBlacklistedOrNotAWord() const {
|
||||
return mIsBlacklistedOrNotAWord;
|
||||
}
|
||||
|
||||
const int *getPrevWordIds() const {
|
||||
return mPrevWordIds;
|
||||
}
|
||||
|
@ -134,8 +126,6 @@ class DicNodeProperties {
|
|||
int mUnigramProbability;
|
||||
int mDicNodeCodePoint;
|
||||
int mWordId;
|
||||
// TODO: Remove
|
||||
bool mIsBlacklistedOrNotAWord;
|
||||
uint16_t mDepth;
|
||||
uint16_t mLeavingDepth;
|
||||
int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
|
|
|
@ -85,9 +85,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
|
|||
scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode);
|
||||
const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
|
||||
+ doubleLetterCost;
|
||||
const bool isPossiblyOffensiveWord =
|
||||
traverseSession->getDictionaryStructurePolicy()->getProbability(
|
||||
terminalDicNode->getUnigramProbability(), NOT_A_PROBABILITY) <= 0;
|
||||
const WordAttributes wordAttributes = traverseSession->getDictionaryStructurePolicy()
|
||||
->getWordAttributesInContext(terminalDicNode->getPrevWordIds(),
|
||||
terminalDicNode->getWordId(), nullptr /* multiBigramMap */);
|
||||
const bool isExactMatch =
|
||||
ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
|
||||
const bool isExactMatchWithIntentionalOmission =
|
||||
|
@ -97,19 +97,19 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
|
|||
// Heuristic: We exclude probability=0 first-char-uppercase words from exact match.
|
||||
// (e.g. "AMD" and "and")
|
||||
const bool isSafeExactMatch = isExactMatch
|
||||
&& !(isPossiblyOffensiveWord && isFirstCharUppercase);
|
||||
&& !(wordAttributes.isPossiblyOffensive() && isFirstCharUppercase);
|
||||
const int outputTypeFlags =
|
||||
(isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
|
||||
(wordAttributes.isPossiblyOffensive() ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
|
||||
| ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0)
|
||||
| (isExactMatchWithIntentionalOmission ?
|
||||
Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0);
|
||||
|
||||
// Entries that are blacklisted or do not represent a word should not be output.
|
||||
const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
|
||||
const bool isValidWord = !(wordAttributes.isBlacklisted() || wordAttributes.isNotAWord());
|
||||
// When we have to block offensive words, non-exact matched offensive words should not be
|
||||
// output.
|
||||
const bool blockOffensiveWords = traverseSession->getSuggestOptions()->blockOffensiveWords();
|
||||
const bool isBlockedOffensiveWord = blockOffensiveWords && isPossiblyOffensiveWord
|
||||
const bool isBlockedOffensiveWord = blockOffensiveWords && wordAttributes.isPossiblyOffensive()
|
||||
&& !isSafeExactMatch;
|
||||
|
||||
// Increase output score of top typing suggestion to ensure autocorrection.
|
||||
|
|
|
@ -79,10 +79,8 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
|
|||
}
|
||||
const int wordId = isTerminal ? ptNodeParams.getHeadPos() : NOT_A_WORD_ID;
|
||||
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
|
||||
ptNodeParams.getProbability(), wordId,
|
||||
ptNodeParams.isBlacklisted()
|
||||
|| ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
|
||||
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
|
||||
ptNodeParams.getProbability(), wordId, ptNodeParams.getCodePointCount(),
|
||||
ptNodeParams.getCodePoints());
|
||||
}
|
||||
if (readingHelper.isError()) {
|
||||
mIsCorrupted = true;
|
||||
|
|
|
@ -408,8 +408,6 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
|
|||
if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
|
||||
const int wordId = PatriciaTrieReadingUtils::isTerminal(flags) ? ptNodePos : NOT_A_WORD_ID;
|
||||
childDicNodes->pushLeavingChild(dicNode, childrenPos, probability, wordId,
|
||||
PatriciaTrieReadingUtils::isBlacklisted(flags)
|
||||
|| PatriciaTrieReadingUtils::isNotAWord(flags),
|
||||
mergedNodeCodePointCount, mergedNodeCodePoints);
|
||||
}
|
||||
return siblingPos;
|
||||
|
|
|
@ -70,8 +70,6 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
|
|||
const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID;
|
||||
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
|
||||
ptNodeParams.getProbability(), wordId,
|
||||
ptNodeParams.isBlacklisted()
|
||||
|| ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
|
||||
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
|
||||
}
|
||||
if (readingHelper.isError()) {
|
||||
|
|
Loading…
Reference in a new issue