Use WordAttributes for checking flags.

Bug: 14425059
Change-Id: Idee84478a482a0e7b5cc53e5dbd4e2484584ba79
This commit is contained in:
Keisuke Kuroyanagi 2014-09-10 15:28:57 +09:00
parent 2111e3abc9
commit 87a5c76906
8 changed files with 22 additions and 41 deletions

View file

@ -136,14 +136,14 @@ class DicNode {
} }
void initAsChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos, void initAsChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos,
const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord, const int unigramProbability, const int wordId, const uint16_t mergedNodeCodePointCount,
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { const int *const mergedNodeCodePoints) {
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1); uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
const uint16_t newLeavingDepth = static_cast<uint16_t>( const uint16_t newLeavingDepth = static_cast<uint16_t>(
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
mDicNodeProperties.init(childrenPtNodeArrayPos, mergedNodeCodePoints[0], mDicNodeProperties.init(childrenPtNodeArrayPos, mergedNodeCodePoints[0],
unigramProbability, wordId, isBlacklistedOrNotAWord, newDepth, newLeavingDepth, unigramProbability, wordId, newDepth, newLeavingDepth,
dicNode->mDicNodeProperties.getPrevWordIds()); dicNode->mDicNodeProperties.getPrevWordIds());
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints); mergedNodeCodePoints);
@ -178,9 +178,6 @@ class DicNode {
// Check if the current word and the previous word can be considered as a valid multiple word // Check if the current word and the previous word can be considered as a valid multiple word
// suggestion. // suggestion.
bool isValidMultipleWordSuggestion() const { bool isValidMultipleWordSuggestion() const {
if (isBlacklistedOrNotAWord()) {
return false;
}
// Treat suggestion as invalid if the current and the previous word are single character // Treat suggestion as invalid if the current and the previous word are single character
// words. // words.
const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength() const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
@ -404,10 +401,6 @@ class DicNode {
return mDicNodeState.mDicNodeStateScoring.getContainedErrorTypes(); return mDicNodeState.mDicNodeStateScoring.getContainedErrorTypes();
} }
bool isBlacklistedOrNotAWord() const {
return mDicNodeProperties.isBlacklistedOrNotAWord();
}
inline uint16_t getNodeCodePointCount() const { inline uint16_t getNodeCodePointCount() const {
return mDicNodeProperties.getDepth(); return mDicNodeProperties.getDepth();
} }

View file

@ -74,6 +74,10 @@ namespace latinime {
} }
const WordAttributes wordAttributes = dictionaryStructurePolicy->getWordAttributesInContext( const WordAttributes wordAttributes = dictionaryStructurePolicy->getWordAttributesInContext(
dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap); dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap);
if (dicNode->hasMultipleWords()
&& (wordAttributes.isBlacklisted() || wordAttributes.isNotAWord())) {
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
}
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this. // TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
const float cost = static_cast<float>(MAX_PROBABILITY - wordAttributes.getProbability()) const float cost = static_cast<float>(MAX_PROBABILITY - wordAttributes.getProbability())
/ static_cast<float>(MAX_PROBABILITY); / static_cast<float>(MAX_PROBABILITY);

View file

@ -59,12 +59,12 @@ class DicNodeVector {
} }
void pushLeavingChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos, void pushLeavingChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos,
const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord, const int unigramProbability, const int wordId,
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
ASSERT(!mLock); ASSERT(!mLock);
mDicNodes.emplace_back(); mDicNodes.emplace_back();
mDicNodes.back().initAsChild(dicNode, childrenPtNodeArrayPos, unigramProbability, mDicNodes.back().initAsChild(dicNode, childrenPtNodeArrayPos, unigramProbability,
wordId, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints); wordId, mergedNodeCodePointCount, mergedNodeCodePoints);
} }
DicNode *operator[](const int id) { DicNode *operator[](const int id) {

View file

@ -30,20 +30,19 @@ class DicNodeProperties {
public: public:
AK_FORCE_INLINE DicNodeProperties() AK_FORCE_INLINE DicNodeProperties()
: mChildrenPtNodeArrayPos(NOT_A_DICT_POS), mUnigramProbability(NOT_A_PROBABILITY), : mChildrenPtNodeArrayPos(NOT_A_DICT_POS), mUnigramProbability(NOT_A_PROBABILITY),
mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID), mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID), mDepth(0),
mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {} mLeavingDepth(0) {}
~DicNodeProperties() {} ~DicNodeProperties() {}
// Should be called only once per DicNode is initialized. // Should be called only once per DicNode is initialized.
void init(const int childrenPos, const int nodeCodePoint, const int unigramProbability, void init(const int childrenPos, const int nodeCodePoint, const int unigramProbability,
const int wordId, const bool isBlacklistedOrNotAWord, const uint16_t depth, const int wordId, const uint16_t depth, const uint16_t leavingDepth,
const uint16_t leavingDepth, const int *const prevWordIds) { const int *const prevWordIds) {
mChildrenPtNodeArrayPos = childrenPos; mChildrenPtNodeArrayPos = childrenPos;
mDicNodeCodePoint = nodeCodePoint; mDicNodeCodePoint = nodeCodePoint;
mUnigramProbability = unigramProbability; mUnigramProbability = unigramProbability;
mWordId = wordId; mWordId = wordId;
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth; mDepth = depth;
mLeavingDepth = leavingDepth; mLeavingDepth = leavingDepth;
memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds)); memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds));
@ -55,7 +54,6 @@ class DicNodeProperties {
mDicNodeCodePoint = NOT_A_CODE_POINT; mDicNodeCodePoint = NOT_A_CODE_POINT;
mUnigramProbability = NOT_A_PROBABILITY; mUnigramProbability = NOT_A_PROBABILITY;
mWordId = NOT_A_WORD_ID; mWordId = NOT_A_WORD_ID;
mIsBlacklistedOrNotAWord = false;
mDepth = 0; mDepth = 0;
mLeavingDepth = 0; mLeavingDepth = 0;
memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds)); memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds));
@ -66,7 +64,6 @@ class DicNodeProperties {
mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint; mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
mUnigramProbability = dicNodeProp->mUnigramProbability; mUnigramProbability = dicNodeProp->mUnigramProbability;
mWordId = dicNodeProp->mWordId; mWordId = dicNodeProp->mWordId;
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth; mDepth = dicNodeProp->mDepth;
mLeavingDepth = dicNodeProp->mLeavingDepth; mLeavingDepth = dicNodeProp->mLeavingDepth;
memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds)); memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds));
@ -78,7 +75,6 @@ class DicNodeProperties {
mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
mUnigramProbability = dicNodeProp->mUnigramProbability; mUnigramProbability = dicNodeProp->mUnigramProbability;
mWordId = dicNodeProp->mWordId; mWordId = dicNodeProp->mWordId;
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
mLeavingDepth = dicNodeProp->mLeavingDepth; mLeavingDepth = dicNodeProp->mLeavingDepth;
memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds)); memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds));
@ -113,10 +109,6 @@ class DicNodeProperties {
return (mChildrenPtNodeArrayPos != NOT_A_DICT_POS) || mDepth != mLeavingDepth; return (mChildrenPtNodeArrayPos != NOT_A_DICT_POS) || mDepth != mLeavingDepth;
} }
bool isBlacklistedOrNotAWord() const {
return mIsBlacklistedOrNotAWord;
}
const int *getPrevWordIds() const { const int *getPrevWordIds() const {
return mPrevWordIds; return mPrevWordIds;
} }
@ -134,8 +126,6 @@ class DicNodeProperties {
int mUnigramProbability; int mUnigramProbability;
int mDicNodeCodePoint; int mDicNodeCodePoint;
int mWordId; int mWordId;
// TODO: Remove
bool mIsBlacklistedOrNotAWord;
uint16_t mDepth; uint16_t mDepth;
uint16_t mLeavingDepth; uint16_t mLeavingDepth;
int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];

View file

@ -85,9 +85,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode); scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode);
const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight) const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
+ doubleLetterCost; + doubleLetterCost;
const bool isPossiblyOffensiveWord = const WordAttributes wordAttributes = traverseSession->getDictionaryStructurePolicy()
traverseSession->getDictionaryStructurePolicy()->getProbability( ->getWordAttributesInContext(terminalDicNode->getPrevWordIds(),
terminalDicNode->getUnigramProbability(), NOT_A_PROBABILITY) <= 0; terminalDicNode->getWordId(), nullptr /* multiBigramMap */);
const bool isExactMatch = const bool isExactMatch =
ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes()); ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
const bool isExactMatchWithIntentionalOmission = const bool isExactMatchWithIntentionalOmission =
@ -97,19 +97,19 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
// Heuristic: We exclude probability=0 first-char-uppercase words from exact match. // Heuristic: We exclude probability=0 first-char-uppercase words from exact match.
// (e.g. "AMD" and "and") // (e.g. "AMD" and "and")
const bool isSafeExactMatch = isExactMatch const bool isSafeExactMatch = isExactMatch
&& !(isPossiblyOffensiveWord && isFirstCharUppercase); && !(wordAttributes.isPossiblyOffensive() && isFirstCharUppercase);
const int outputTypeFlags = const int outputTypeFlags =
(isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0) (wordAttributes.isPossiblyOffensive() ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
| ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0) | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0)
| (isExactMatchWithIntentionalOmission ? | (isExactMatchWithIntentionalOmission ?
Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0); Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0);
// Entries that are blacklisted or do not represent a word should not be output. // Entries that are blacklisted or do not represent a word should not be output.
const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord(); const bool isValidWord = !(wordAttributes.isBlacklisted() || wordAttributes.isNotAWord());
// When we have to block offensive words, non-exact matched offensive words should not be // When we have to block offensive words, non-exact matched offensive words should not be
// output. // output.
const bool blockOffensiveWords = traverseSession->getSuggestOptions()->blockOffensiveWords(); const bool blockOffensiveWords = traverseSession->getSuggestOptions()->blockOffensiveWords();
const bool isBlockedOffensiveWord = blockOffensiveWords && isPossiblyOffensiveWord const bool isBlockedOffensiveWord = blockOffensiveWords && wordAttributes.isPossiblyOffensive()
&& !isSafeExactMatch; && !isSafeExactMatch;
// Increase output score of top typing suggestion to ensure autocorrection. // Increase output score of top typing suggestion to ensure autocorrection.

View file

@ -79,10 +79,8 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
} }
const int wordId = isTerminal ? ptNodeParams.getHeadPos() : NOT_A_WORD_ID; const int wordId = isTerminal ? ptNodeParams.getHeadPos() : NOT_A_WORD_ID;
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(), childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
ptNodeParams.getProbability(), wordId, ptNodeParams.getProbability(), wordId, ptNodeParams.getCodePointCount(),
ptNodeParams.isBlacklisted() ptNodeParams.getCodePoints());
|| ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
} }
if (readingHelper.isError()) { if (readingHelper.isError()) {
mIsCorrupted = true; mIsCorrupted = true;

View file

@ -408,8 +408,6 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) { if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
const int wordId = PatriciaTrieReadingUtils::isTerminal(flags) ? ptNodePos : NOT_A_WORD_ID; const int wordId = PatriciaTrieReadingUtils::isTerminal(flags) ? ptNodePos : NOT_A_WORD_ID;
childDicNodes->pushLeavingChild(dicNode, childrenPos, probability, wordId, childDicNodes->pushLeavingChild(dicNode, childrenPos, probability, wordId,
PatriciaTrieReadingUtils::isBlacklisted(flags)
|| PatriciaTrieReadingUtils::isNotAWord(flags),
mergedNodeCodePointCount, mergedNodeCodePoints); mergedNodeCodePointCount, mergedNodeCodePoints);
} }
return siblingPos; return siblingPos;

View file

@ -70,8 +70,6 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID; const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID;
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(), childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
ptNodeParams.getProbability(), wordId, ptNodeParams.getProbability(), wordId,
ptNodeParams.isBlacklisted()
|| ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
} }
if (readingHelper.isError()) { if (readingHelper.isError()) {