Merge "Replace NOT_VALID_WORD with NOT_A_VALID_WORD_POS"
commit
544e891cf1
|
@ -292,7 +292,7 @@ static inline void prof_out(void) {
|
||||||
// of the binary dictionary where a {key,value} string pair scheme is used.
|
// of the binary dictionary where a {key,value} string pair scheme is used.
|
||||||
#define LARGEST_INT_DIGIT_COUNT 11
|
#define LARGEST_INT_DIGIT_COUNT 11
|
||||||
|
|
||||||
#define NOT_VALID_WORD (-99)
|
#define NOT_A_VALID_WORD_POS (-99)
|
||||||
#define NOT_A_CODE_POINT (-1)
|
#define NOT_A_CODE_POINT (-1)
|
||||||
#define NOT_A_DISTANCE (-1)
|
#define NOT_A_DISTANCE (-1)
|
||||||
#define NOT_A_COORDINATE (-1)
|
#define NOT_A_COORDINATE (-1)
|
||||||
|
|
|
@ -112,7 +112,7 @@ class DicNode {
|
||||||
mIsUsed = true;
|
mIsUsed = true;
|
||||||
mIsCachedForNextSuggestion = false;
|
mIsCachedForNextSuggestion = false;
|
||||||
mDicNodeProperties.init(
|
mDicNodeProperties.init(
|
||||||
NOT_A_DICT_POS, rootGroupPos, NOT_A_DICT_POS /* attributesPos */,
|
NOT_A_VALID_WORD_POS /* pos */, rootGroupPos, NOT_A_DICT_POS /* attributesPos */,
|
||||||
NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
|
NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
|
||||||
false /* isTerminal */, true /* hasChildren */,
|
false /* isTerminal */, true /* hasChildren */,
|
||||||
false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */);
|
false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */);
|
||||||
|
@ -125,7 +125,7 @@ class DicNode {
|
||||||
mIsUsed = true;
|
mIsUsed = true;
|
||||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||||
mDicNodeProperties.init(
|
mDicNodeProperties.init(
|
||||||
NOT_A_DICT_POS, rootGroupPos, NOT_A_DICT_POS /* attributesPos */,
|
NOT_A_VALID_WORD_POS /* pos */, rootGroupPos, NOT_A_DICT_POS /* attributesPos */,
|
||||||
NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
|
NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
|
||||||
false /* isTerminal */, true /* hasChildren */,
|
false /* isTerminal */, true /* hasChildren */,
|
||||||
false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */);
|
false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */);
|
||||||
|
@ -231,7 +231,7 @@ class DicNode {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isFirstWord() const {
|
bool isFirstWord() const {
|
||||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_VALID_WORD;
|
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_VALID_WORD_POS;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isCompletion(const int inputSize) const {
|
bool isCompletion(const int inputSize) const {
|
||||||
|
|
|
@ -29,7 +29,7 @@ class DicNodeStatePrevWord {
|
||||||
public:
|
public:
|
||||||
AK_FORCE_INLINE DicNodeStatePrevWord()
|
AK_FORCE_INLINE DicNodeStatePrevWord()
|
||||||
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0),
|
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0),
|
||||||
mPrevWordNodePos(0) {
|
mPrevWordNodePos(NOT_A_VALID_WORD_POS) {
|
||||||
memset(mPrevWord, 0, sizeof(mPrevWord));
|
memset(mPrevWord, 0, sizeof(mPrevWord));
|
||||||
memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions));
|
memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions));
|
||||||
}
|
}
|
||||||
|
@ -41,7 +41,7 @@ class DicNodeStatePrevWord {
|
||||||
mPrevWordCount = 0;
|
mPrevWordCount = 0;
|
||||||
mPrevWordStart = 0;
|
mPrevWordStart = 0;
|
||||||
mPrevWordProbability = -1;
|
mPrevWordProbability = -1;
|
||||||
mPrevWordNodePos = NOT_VALID_WORD;
|
mPrevWordNodePos = NOT_A_VALID_WORD_POS;
|
||||||
memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions));
|
memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -179,8 +179,9 @@ namespace latinime {
|
||||||
const int unigramProbability = node->getProbability();
|
const int unigramProbability = node->getProbability();
|
||||||
const int wordPos = node->getPos();
|
const int wordPos = node->getPos();
|
||||||
const int prevWordPos = node->getPrevWordPos();
|
const int prevWordPos = node->getPrevWordPos();
|
||||||
if (NOT_VALID_WORD == wordPos || NOT_VALID_WORD == prevWordPos) {
|
if (NOT_A_VALID_WORD_POS == wordPos || NOT_A_VALID_WORD_POS == prevWordPos) {
|
||||||
// Note: Normally wordPos comes from the dictionary and should never equal NOT_VALID_WORD.
|
// Note: Normally wordPos comes from the dictionary and should never equal
|
||||||
|
// NOT_A_VALID_WORD_POS.
|
||||||
return ProbabilityUtils::backoff(unigramProbability);
|
return ProbabilityUtils::backoff(unigramProbability);
|
||||||
}
|
}
|
||||||
if (multiBigramMap) {
|
if (multiBigramMap) {
|
||||||
|
|
|
@ -153,7 +153,7 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
|
||||||
if (0 >= prevWordLength) return 0;
|
if (0 >= prevWordLength) return 0;
|
||||||
int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
||||||
mBinaryDictionaryInfo, prevWord, prevWordLength, forceLowerCaseSearch);
|
mBinaryDictionaryInfo, prevWord, prevWordLength, forceLowerCaseSearch);
|
||||||
if (NOT_VALID_WORD == pos) return 0;
|
if (NOT_A_VALID_WORD_POS == pos) return 0;
|
||||||
return BinaryFormat::getBigramListPositionForWordPosition(
|
return BinaryFormat::getBigramListPositionForWordPosition(
|
||||||
mBinaryDictionaryInfo->getDictRoot(), pos);
|
mBinaryDictionaryInfo->getDictRoot(), pos);
|
||||||
}
|
}
|
||||||
|
@ -181,7 +181,7 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w
|
||||||
if (0 == pos) return false;
|
if (0 == pos) return false;
|
||||||
int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
||||||
mBinaryDictionaryInfo, word1, length1, false /* forceLowerCaseSearch */);
|
mBinaryDictionaryInfo, word1, length1, false /* forceLowerCaseSearch */);
|
||||||
if (NOT_VALID_WORD == nextWordPos) return false;
|
if (NOT_A_VALID_WORD_POS == nextWordPos) return false;
|
||||||
|
|
||||||
for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
|
for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
|
||||||
bigramsIt.hasNext(); /* no-op */) {
|
bigramsIt.hasNext(); /* no-op */) {
|
||||||
|
|
|
@ -255,7 +255,7 @@ inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function gets the byte position of the last chargroup of the exact matching word in the
|
// This function gets the byte position of the last chargroup of the exact matching word in the
|
||||||
// dictionary. If no match is found, it returns NOT_VALID_WORD.
|
// dictionary. If no match is found, it returns NOT_A_VALID_WORD_POS.
|
||||||
AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
||||||
const int *const inWord, const int length, const bool forceLowerCaseSearch) {
|
const int *const inWord, const int length, const bool forceLowerCaseSearch) {
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
|
@ -264,22 +264,22 @@ AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
||||||
while (true) {
|
while (true) {
|
||||||
// If we already traversed the tree further than the word is long, there means
|
// If we already traversed the tree further than the word is long, there means
|
||||||
// there was no match (or we would have found it).
|
// there was no match (or we would have found it).
|
||||||
if (wordPos >= length) return NOT_VALID_WORD;
|
if (wordPos >= length) return NOT_A_VALID_WORD_POS;
|
||||||
int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
|
int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
|
||||||
const int wChar = forceLowerCaseSearch
|
const int wChar = forceLowerCaseSearch
|
||||||
? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
|
? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
|
||||||
while (true) {
|
while (true) {
|
||||||
// If there are no more character groups in this node, it means we could not
|
// If there are no more character groups in this node, it means we could not
|
||||||
// find a matching character for this depth, therefore there is no match.
|
// find a matching character for this depth, therefore there is no match.
|
||||||
if (0 >= charGroupCount) return NOT_VALID_WORD;
|
if (0 >= charGroupCount) return NOT_A_VALID_WORD_POS;
|
||||||
const int charGroupPos = pos;
|
const int charGroupPos = pos;
|
||||||
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||||
int character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
int character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||||
if (character == wChar) {
|
if (character == wChar) {
|
||||||
// This is the correct node. Only one character group may start with the same
|
// This is the correct node. Only one character group may start with the same
|
||||||
// char within a node, so either we found our match in this node, or there is
|
// char within a node, so either we found our match in this node, or there is
|
||||||
// no match and we can return NOT_VALID_WORD. So we will check all the characters
|
// no match and we can return NOT_A_VALID_WORD_POS. So we will check all the
|
||||||
// in this character group indeed does match.
|
// characters in this character group indeed does match.
|
||||||
if (FLAG_HAS_MULTIPLE_CHARS & flags) {
|
if (FLAG_HAS_MULTIPLE_CHARS & flags) {
|
||||||
character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||||
while (NOT_A_CODE_POINT != character) {
|
while (NOT_A_CODE_POINT != character) {
|
||||||
|
@ -288,8 +288,8 @@ AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
||||||
// character that does not match, as explained above, it means the word is
|
// character that does not match, as explained above, it means the word is
|
||||||
// not in the dictionary (by virtue of this chargroup being the only one to
|
// not in the dictionary (by virtue of this chargroup being the only one to
|
||||||
// match the word on the first character, but not matching the whole word).
|
// match the word on the first character, but not matching the whole word).
|
||||||
if (wordPos >= length) return NOT_VALID_WORD;
|
if (wordPos >= length) return NOT_A_VALID_WORD_POS;
|
||||||
if (inWord[wordPos] != character) return NOT_VALID_WORD;
|
if (inWord[wordPos] != character) return NOT_A_VALID_WORD_POS;
|
||||||
character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -305,7 +305,7 @@ AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
||||||
pos = BinaryFormat::skipProbability(FLAG_IS_TERMINAL, pos);
|
pos = BinaryFormat::skipProbability(FLAG_IS_TERMINAL, pos);
|
||||||
}
|
}
|
||||||
if (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS == (MASK_GROUP_ADDRESS_TYPE & flags)) {
|
if (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS == (MASK_GROUP_ADDRESS_TYPE & flags)) {
|
||||||
return NOT_VALID_WORD;
|
return NOT_A_VALID_WORD_POS;
|
||||||
}
|
}
|
||||||
// We have children and we are still shorter than the word we are searching for, so
|
// We have children and we are still shorter than the word we are searching for, so
|
||||||
// we need to traverse children. Put the pointer on the children position, and
|
// we need to traverse children. Put the pointer on the children position, and
|
||||||
|
@ -474,7 +474,7 @@ AK_FORCE_INLINE int BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointC
|
||||||
|
|
||||||
AK_FORCE_INLINE int BinaryFormat::getBigramListPositionForWordPosition(
|
AK_FORCE_INLINE int BinaryFormat::getBigramListPositionForWordPosition(
|
||||||
const uint8_t *const root, int position) {
|
const uint8_t *const root, int position) {
|
||||||
if (NOT_VALID_WORD == position) return 0;
|
if (NOT_A_VALID_WORD_POS == position) return 0;
|
||||||
const uint8_t flags = getFlagsAndForwardPointer(root, &position);
|
const uint8_t flags = getFlagsAndForwardPointer(root, &position);
|
||||||
if (!(flags & FLAG_HAS_BIGRAMS)) return 0;
|
if (!(flags & FLAG_HAS_BIGRAMS)) return 0;
|
||||||
if (flags & FLAG_HAS_MULTIPLE_CHARS) {
|
if (flags & FLAG_HAS_MULTIPLE_CHARS) {
|
||||||
|
|
|
@ -89,7 +89,7 @@ int Dictionary::getProbability(const int *word, int length) const {
|
||||||
mBinaryDictionaryInfo.getStructurePolicy();
|
mBinaryDictionaryInfo.getStructurePolicy();
|
||||||
int pos = structurePolicy->getTerminalNodePositionOfWord(&mBinaryDictionaryInfo, word, length,
|
int pos = structurePolicy->getTerminalNodePositionOfWord(&mBinaryDictionaryInfo, word, length,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (NOT_VALID_WORD == pos) {
|
if (NOT_A_VALID_WORD_POS == pos) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
return structurePolicy->getUnigramProbability(&mBinaryDictionaryInfo, pos);
|
return structurePolicy->getUnigramProbability(&mBinaryDictionaryInfo, pos);
|
||||||
|
|
|
@ -32,13 +32,13 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre
|
||||||
mMultiWordCostMultiplier = binaryDictionaryInfo->getHeader()->getMultiWordCostMultiplier();
|
mMultiWordCostMultiplier = binaryDictionaryInfo->getHeader()->getMultiWordCostMultiplier();
|
||||||
mSuggestOptions = suggestOptions;
|
mSuggestOptions = suggestOptions;
|
||||||
if (!prevWord) {
|
if (!prevWord) {
|
||||||
mPrevWordPos = NOT_VALID_WORD;
|
mPrevWordPos = NOT_A_VALID_WORD_POS;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
|
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
|
||||||
mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
||||||
binaryDictionaryInfo, prevWord, prevWordLength, false /* forceLowerCaseSearch */);
|
binaryDictionaryInfo, prevWord, prevWordLength, false /* forceLowerCaseSearch */);
|
||||||
if (mPrevWordPos == NOT_VALID_WORD) {
|
if (mPrevWordPos == NOT_A_VALID_WORD_POS) {
|
||||||
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
||||||
// auto-capitalized words like "The [current_word]".
|
// auto-capitalized words like "The [current_word]".
|
||||||
mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
||||||
|
|
|
@ -55,7 +55,7 @@ class DicTraverseSession {
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr)
|
AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr)
|
||||||
: mPrevWordPos(NOT_VALID_WORD), mProximityInfo(0),
|
: mPrevWordPos(NOT_A_VALID_WORD_POS), mProximityInfo(0),
|
||||||
mDictionary(0), mSuggestOptions(0), mDicNodesCache(), mMultiBigramMap(),
|
mDictionary(0), mSuggestOptions(0), mDicNodesCache(), mMultiBigramMap(),
|
||||||
mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1),
|
mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1),
|
||||||
mMultiWordCostMultiplier(1.0f) {
|
mMultiWordCostMultiplier(1.0f) {
|
||||||
|
|
Loading…
Reference in New Issue