Merge "Use WordAttributes for checking flags."

This commit is contained in:
Keisuke Kuroyanagi 2014-09-10 12:13:29 +00:00 committed by Android (Google) Code Review
commit ae41d35971
8 changed files with 22 additions and 41 deletions

View file

@ -136,14 +136,14 @@ class DicNode {
}
void initAsChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos,
const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord,
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
const int unigramProbability, const int wordId, const uint16_t mergedNodeCodePointCount,
const int *const mergedNodeCodePoints) {
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
const uint16_t newLeavingDepth = static_cast<uint16_t>(
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
mDicNodeProperties.init(childrenPtNodeArrayPos, mergedNodeCodePoints[0],
unigramProbability, wordId, isBlacklistedOrNotAWord, newDepth, newLeavingDepth,
unigramProbability, wordId, newDepth, newLeavingDepth,
dicNode->mDicNodeProperties.getPrevWordIds());
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints);
@ -178,9 +178,6 @@ class DicNode {
// Check if the current word and the previous word can be considered as a valid multiple word
// suggestion.
bool isValidMultipleWordSuggestion() const {
if (isBlacklistedOrNotAWord()) {
return false;
}
// Treat suggestion as invalid if the current and the previous word are single character
// words.
const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
@ -404,10 +401,6 @@ class DicNode {
return mDicNodeState.mDicNodeStateScoring.getContainedErrorTypes();
}
bool isBlacklistedOrNotAWord() const {
return mDicNodeProperties.isBlacklistedOrNotAWord();
}
inline uint16_t getNodeCodePointCount() const {
return mDicNodeProperties.getDepth();
}

View file

@ -74,6 +74,10 @@ namespace latinime {
}
const WordAttributes wordAttributes = dictionaryStructurePolicy->getWordAttributesInContext(
dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap);
if (dicNode->hasMultipleWords()
&& (wordAttributes.isBlacklisted() || wordAttributes.isNotAWord())) {
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
}
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
const float cost = static_cast<float>(MAX_PROBABILITY - wordAttributes.getProbability())
/ static_cast<float>(MAX_PROBABILITY);

View file

@ -59,12 +59,12 @@ class DicNodeVector {
}
void pushLeavingChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos,
const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord,
const int unigramProbability, const int wordId,
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
ASSERT(!mLock);
mDicNodes.emplace_back();
mDicNodes.back().initAsChild(dicNode, childrenPtNodeArrayPos, unigramProbability,
wordId, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints);
wordId, mergedNodeCodePointCount, mergedNodeCodePoints);
}
DicNode *operator[](const int id) {

View file

@ -30,20 +30,19 @@ class DicNodeProperties {
public:
AK_FORCE_INLINE DicNodeProperties()
: mChildrenPtNodeArrayPos(NOT_A_DICT_POS), mUnigramProbability(NOT_A_PROBABILITY),
mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID),
mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID), mDepth(0),
mLeavingDepth(0) {}
~DicNodeProperties() {}
// Should be called only once per DicNode is initialized.
void init(const int childrenPos, const int nodeCodePoint, const int unigramProbability,
const int wordId, const bool isBlacklistedOrNotAWord, const uint16_t depth,
const uint16_t leavingDepth, const int *const prevWordIds) {
const int wordId, const uint16_t depth, const uint16_t leavingDepth,
const int *const prevWordIds) {
mChildrenPtNodeArrayPos = childrenPos;
mDicNodeCodePoint = nodeCodePoint;
mUnigramProbability = unigramProbability;
mWordId = wordId;
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth;
mLeavingDepth = leavingDepth;
memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds));
@ -55,7 +54,6 @@ class DicNodeProperties {
mDicNodeCodePoint = NOT_A_CODE_POINT;
mUnigramProbability = NOT_A_PROBABILITY;
mWordId = NOT_A_WORD_ID;
mIsBlacklistedOrNotAWord = false;
mDepth = 0;
mLeavingDepth = 0;
memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds));
@ -66,7 +64,6 @@ class DicNodeProperties {
mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
mUnigramProbability = dicNodeProp->mUnigramProbability;
mWordId = dicNodeProp->mWordId;
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth;
mLeavingDepth = dicNodeProp->mLeavingDepth;
memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds));
@ -78,7 +75,6 @@ class DicNodeProperties {
mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
mUnigramProbability = dicNodeProp->mUnigramProbability;
mWordId = dicNodeProp->mWordId;
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
mLeavingDepth = dicNodeProp->mLeavingDepth;
memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds));
@ -113,10 +109,6 @@ class DicNodeProperties {
return (mChildrenPtNodeArrayPos != NOT_A_DICT_POS) || mDepth != mLeavingDepth;
}
bool isBlacklistedOrNotAWord() const {
return mIsBlacklistedOrNotAWord;
}
const int *getPrevWordIds() const {
return mPrevWordIds;
}
@ -134,8 +126,6 @@ class DicNodeProperties {
int mUnigramProbability;
int mDicNodeCodePoint;
int mWordId;
// TODO: Remove
bool mIsBlacklistedOrNotAWord;
uint16_t mDepth;
uint16_t mLeavingDepth;
int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];

View file

@ -85,9 +85,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode);
const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
+ doubleLetterCost;
const bool isPossiblyOffensiveWord =
traverseSession->getDictionaryStructurePolicy()->getProbability(
terminalDicNode->getUnigramProbability(), NOT_A_PROBABILITY) <= 0;
const WordAttributes wordAttributes = traverseSession->getDictionaryStructurePolicy()
->getWordAttributesInContext(terminalDicNode->getPrevWordIds(),
terminalDicNode->getWordId(), nullptr /* multiBigramMap */);
const bool isExactMatch =
ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
const bool isExactMatchWithIntentionalOmission =
@ -97,19 +97,19 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
// Heuristic: We exclude probability=0 first-char-uppercase words from exact match.
// (e.g. "AMD" and "and")
const bool isSafeExactMatch = isExactMatch
&& !(isPossiblyOffensiveWord && isFirstCharUppercase);
&& !(wordAttributes.isPossiblyOffensive() && isFirstCharUppercase);
const int outputTypeFlags =
(isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
(wordAttributes.isPossiblyOffensive() ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
| ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0)
| (isExactMatchWithIntentionalOmission ?
Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0);
// Entries that are blacklisted or do not represent a word should not be output.
const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
const bool isValidWord = !(wordAttributes.isBlacklisted() || wordAttributes.isNotAWord());
// When we have to block offensive words, non-exact matched offensive words should not be
// output.
const bool blockOffensiveWords = traverseSession->getSuggestOptions()->blockOffensiveWords();
const bool isBlockedOffensiveWord = blockOffensiveWords && isPossiblyOffensiveWord
const bool isBlockedOffensiveWord = blockOffensiveWords && wordAttributes.isPossiblyOffensive()
&& !isSafeExactMatch;
// Increase output score of top typing suggestion to ensure autocorrection.

View file

@ -79,10 +79,8 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
}
const int wordId = isTerminal ? ptNodeParams.getHeadPos() : NOT_A_WORD_ID;
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
ptNodeParams.getProbability(), wordId,
ptNodeParams.isBlacklisted()
|| ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
ptNodeParams.getProbability(), wordId, ptNodeParams.getCodePointCount(),
ptNodeParams.getCodePoints());
}
if (readingHelper.isError()) {
mIsCorrupted = true;

View file

@ -408,8 +408,6 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
const int wordId = PatriciaTrieReadingUtils::isTerminal(flags) ? ptNodePos : NOT_A_WORD_ID;
childDicNodes->pushLeavingChild(dicNode, childrenPos, probability, wordId,
PatriciaTrieReadingUtils::isBlacklisted(flags)
|| PatriciaTrieReadingUtils::isNotAWord(flags),
mergedNodeCodePointCount, mergedNodeCodePoints);
}
return siblingPos;

View file

@ -70,8 +70,6 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID;
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
ptNodeParams.getProbability(), wordId,
ptNodeParams.isBlacklisted()
|| ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
}
if (readingHelper.isError()) {