Merge "Get BigramsPos and ShortcutPos via structure policy."
commit
a54b8b3f5d
|
@ -112,10 +112,10 @@ class DicNode {
|
||||||
mIsUsed = true;
|
mIsUsed = true;
|
||||||
mIsCachedForNextSuggestion = false;
|
mIsCachedForNextSuggestion = false;
|
||||||
mDicNodeProperties.init(
|
mDicNodeProperties.init(
|
||||||
NOT_A_VALID_WORD_POS /* pos */, rootGroupPos, NOT_A_DICT_POS /* attributesPos */,
|
NOT_A_VALID_WORD_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
|
||||||
NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
|
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
|
||||||
false /* isTerminal */, true /* hasChildren */,
|
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
|
||||||
false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */);
|
0 /* terminalDepth */);
|
||||||
mDicNodeState.init(prevWordNodePos);
|
mDicNodeState.init(prevWordNodePos);
|
||||||
PROF_NODE_RESET(mProfiler);
|
PROF_NODE_RESET(mProfiler);
|
||||||
}
|
}
|
||||||
|
@ -125,10 +125,10 @@ class DicNode {
|
||||||
mIsUsed = true;
|
mIsUsed = true;
|
||||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||||
mDicNodeProperties.init(
|
mDicNodeProperties.init(
|
||||||
NOT_A_VALID_WORD_POS /* pos */, rootGroupPos, NOT_A_DICT_POS /* attributesPos */,
|
NOT_A_VALID_WORD_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
|
||||||
NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
|
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
|
||||||
false /* isTerminal */, true /* hasChildren */,
|
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
|
||||||
false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */);
|
0 /* terminalDepth */);
|
||||||
// TODO: Move to dicNodeState?
|
// TODO: Move to dicNodeState?
|
||||||
mDicNodeState.mDicNodeStateOutput.init(); // reset for next word
|
mDicNodeState.mDicNodeStateOutput.init(); // reset for next word
|
||||||
mDicNodeState.mDicNodeStateInput.init(
|
mDicNodeState.mDicNodeStateInput.init(
|
||||||
|
@ -157,18 +157,16 @@ class DicNode {
|
||||||
PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
|
PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
|
||||||
}
|
}
|
||||||
|
|
||||||
void initAsChild(DicNode *dicNode, const int pos, const int childrenPos,
|
void initAsChild(DicNode *dicNode, const int pos, const int childrenPos, const int probability,
|
||||||
const int attributesPos, const int probability, const bool isTerminal,
|
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
|
||||||
const bool hasChildren, const bool isBlacklistedOrNotAWord,
|
|
||||||
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
|
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
|
||||||
mIsUsed = true;
|
mIsUsed = true;
|
||||||
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
|
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
|
||||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||||
const uint16_t newLeavingDepth = static_cast<uint16_t>(
|
const uint16_t newLeavingDepth = static_cast<uint16_t>(
|
||||||
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
|
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
|
||||||
mDicNodeProperties.init(pos, childrenPos, attributesPos, mergedNodeCodePoints[0],
|
mDicNodeProperties.init(pos, childrenPos, mergedNodeCodePoints[0], probability,
|
||||||
probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
|
isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, newLeavingDepth);
|
||||||
newLeavingDepth);
|
|
||||||
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
|
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
|
||||||
mergedNodeCodePoints);
|
mergedNodeCodePoints);
|
||||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||||
|
@ -467,10 +465,6 @@ class DicNode {
|
||||||
return mDicNodeProperties.isBlacklistedOrNotAWord();
|
return mDicNodeProperties.isBlacklistedOrNotAWord();
|
||||||
}
|
}
|
||||||
|
|
||||||
int getAttributesPos() const {
|
|
||||||
return mDicNodeProperties.getAttributesPos();
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uint16_t getNodeCodePointCount() const {
|
inline uint16_t getNodeCodePointCount() const {
|
||||||
return mDicNodeProperties.getDepth();
|
return mDicNodeProperties.getDepth();
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,20 +31,17 @@ namespace latinime {
|
||||||
class DicNodeProperties {
|
class DicNodeProperties {
|
||||||
public:
|
public:
|
||||||
AK_FORCE_INLINE DicNodeProperties()
|
AK_FORCE_INLINE DicNodeProperties()
|
||||||
: mPos(0), mChildrenPos(0), mAttributesPos(0), mProbability(0),
|
: mPos(0), mChildrenPos(0), mProbability(0), mNodeCodePoint(0), mIsTerminal(false),
|
||||||
mNodeCodePoint(0), mIsTerminal(false), mHasChildren(false),
|
mHasChildren(false), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
|
||||||
mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
|
|
||||||
|
|
||||||
virtual ~DicNodeProperties() {}
|
virtual ~DicNodeProperties() {}
|
||||||
|
|
||||||
// Should be called only once per DicNode is initialized.
|
// Should be called only once per DicNode is initialized.
|
||||||
void init(const int pos, const int childrenPos, const int attributesPos,
|
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
|
||||||
const int nodeCodePoint, const int probability, const bool isTerminal,
|
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
|
||||||
const bool hasChildren, const bool isBlacklistedOrNotAWord,
|
|
||||||
const uint16_t depth, const uint16_t leavingDepth) {
|
const uint16_t depth, const uint16_t leavingDepth) {
|
||||||
mPos = pos;
|
mPos = pos;
|
||||||
mChildrenPos = childrenPos;
|
mChildrenPos = childrenPos;
|
||||||
mAttributesPos = attributesPos;
|
|
||||||
mNodeCodePoint = nodeCodePoint;
|
mNodeCodePoint = nodeCodePoint;
|
||||||
mProbability = probability;
|
mProbability = probability;
|
||||||
mIsTerminal = isTerminal;
|
mIsTerminal = isTerminal;
|
||||||
|
@ -58,7 +55,6 @@ class DicNodeProperties {
|
||||||
void init(const DicNodeProperties *const nodeProp) {
|
void init(const DicNodeProperties *const nodeProp) {
|
||||||
mPos = nodeProp->mPos;
|
mPos = nodeProp->mPos;
|
||||||
mChildrenPos = nodeProp->mChildrenPos;
|
mChildrenPos = nodeProp->mChildrenPos;
|
||||||
mAttributesPos = nodeProp->mAttributesPos;
|
|
||||||
mNodeCodePoint = nodeProp->mNodeCodePoint;
|
mNodeCodePoint = nodeProp->mNodeCodePoint;
|
||||||
mProbability = nodeProp->mProbability;
|
mProbability = nodeProp->mProbability;
|
||||||
mIsTerminal = nodeProp->mIsTerminal;
|
mIsTerminal = nodeProp->mIsTerminal;
|
||||||
|
@ -72,7 +68,6 @@ class DicNodeProperties {
|
||||||
void init(const DicNodeProperties *const nodeProp, const int codePoint) {
|
void init(const DicNodeProperties *const nodeProp, const int codePoint) {
|
||||||
mPos = nodeProp->mPos;
|
mPos = nodeProp->mPos;
|
||||||
mChildrenPos = nodeProp->mChildrenPos;
|
mChildrenPos = nodeProp->mChildrenPos;
|
||||||
mAttributesPos = nodeProp->mAttributesPos;
|
|
||||||
mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
|
mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
|
||||||
mProbability = nodeProp->mProbability;
|
mProbability = nodeProp->mProbability;
|
||||||
mIsTerminal = nodeProp->mIsTerminal;
|
mIsTerminal = nodeProp->mIsTerminal;
|
||||||
|
@ -90,10 +85,6 @@ class DicNodeProperties {
|
||||||
return mChildrenPos;
|
return mChildrenPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
int getAttributesPos() const {
|
|
||||||
return mAttributesPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getProbability() const {
|
int getProbability() const {
|
||||||
return mProbability;
|
return mProbability;
|
||||||
}
|
}
|
||||||
|
@ -129,7 +120,6 @@ class DicNodeProperties {
|
||||||
// for this class
|
// for this class
|
||||||
int mPos;
|
int mPos;
|
||||||
int mChildrenPos;
|
int mChildrenPos;
|
||||||
int mAttributesPos;
|
|
||||||
int mProbability;
|
int mProbability;
|
||||||
int mNodeCodePoint;
|
int mNodeCodePoint;
|
||||||
bool mIsTerminal;
|
bool mIsTerminal;
|
||||||
|
|
|
@ -77,7 +77,6 @@ namespace latinime {
|
||||||
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||||
const bool isTerminal = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
|
const bool isTerminal = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
|
||||||
const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
|
const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
|
||||||
const bool hasShortcuts = (0 != (BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS & flags));
|
|
||||||
const bool isBlacklistedOrNotAWord = BinaryFormat::hasBlacklistedOrNotAWordFlag(flags);
|
const bool isBlacklistedOrNotAWord = BinaryFormat::hasBlacklistedOrNotAWordFlag(flags);
|
||||||
|
|
||||||
int codePoint = BinaryFormat::getCodePointAndForwardPointer(
|
int codePoint = BinaryFormat::getCodePointAndForwardPointer(
|
||||||
|
@ -104,17 +103,14 @@ namespace latinime {
|
||||||
pos = BinaryFormat::skipProbability(flags, pos);
|
pos = BinaryFormat::skipProbability(flags, pos);
|
||||||
int childrenPos = hasChildren ? BinaryFormat::readChildrenPosition(
|
int childrenPos = hasChildren ? BinaryFormat::readChildrenPosition(
|
||||||
binaryDictionaryInfo->getDictRoot(), flags, pos) : NOT_A_DICT_POS;
|
binaryDictionaryInfo->getDictRoot(), flags, pos) : NOT_A_DICT_POS;
|
||||||
const int attributesPos =
|
|
||||||
hasShortcuts ? BinaryFormat::skipChildrenPosition(flags, pos) : NOT_A_DICT_POS;
|
|
||||||
const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(
|
const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(
|
||||||
binaryDictionaryInfo->getDictRoot(), flags, pos);
|
binaryDictionaryInfo->getDictRoot(), flags, pos);
|
||||||
|
|
||||||
if (childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
|
if (childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
|
||||||
return siblingPos;
|
return siblingPos;
|
||||||
}
|
}
|
||||||
childDicNodes->pushLeavingChild(dicNode, nextPos, childrenPos, attributesPos,
|
childDicNodes->pushLeavingChild(dicNode, nextPos, childrenPos, probability, isTerminal,
|
||||||
probability, isTerminal, hasChildren, isBlacklistedOrNotAWord,
|
hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints);
|
||||||
mergedNodeCodePointCount, mergedNodeCodePoints);
|
|
||||||
return siblingPos;
|
return siblingPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -63,13 +63,13 @@ class DicNodeVector {
|
||||||
}
|
}
|
||||||
|
|
||||||
void pushLeavingChild(DicNode *dicNode, const int pos, const int childrenPos,
|
void pushLeavingChild(DicNode *dicNode, const int pos, const int childrenPos,
|
||||||
const int attributesPos, const int probability, const bool isTerminal,
|
const int probability, const bool isTerminal, const bool hasChildren,
|
||||||
const bool hasChildren, const bool isBlacklistedOrNotAWord,
|
const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
|
||||||
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
|
const int *const mergedNodeCodePoints) {
|
||||||
ASSERT(!mLock);
|
ASSERT(!mLock);
|
||||||
mDicNodes.push_back(mEmptyNode);
|
mDicNodes.push_back(mEmptyNode);
|
||||||
mDicNodes.back().initAsChild(dicNode, pos, childrenPos, attributesPos, probability,
|
mDicNodes.back().initAsChild(dicNode, pos, childrenPos, probability, isTerminal,
|
||||||
isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
|
hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
|
||||||
mergedNodeCodePoints);
|
mergedNodeCodePoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -109,13 +109,13 @@ int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, in
|
||||||
int pos = getBigramListPositionForWord(prevWord, prevWordLength,
|
int pos = getBigramListPositionForWord(prevWord, prevWordLength,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
||||||
if (0 == pos) {
|
if (NOT_A_DICT_POS == pos) {
|
||||||
// If no bigrams for this exact word, search again in lower case.
|
// If no bigrams for this exact word, search again in lower case.
|
||||||
pos = getBigramListPositionForWord(prevWord, prevWordLength,
|
pos = getBigramListPositionForWord(prevWord, prevWordLength,
|
||||||
true /* forceLowerCaseSearch */);
|
true /* forceLowerCaseSearch */);
|
||||||
}
|
}
|
||||||
// If still no bigrams, we really don't have them!
|
// If still no bigrams, we really don't have them!
|
||||||
if (0 == pos) return 0;
|
if (NOT_A_DICT_POS == pos) return 0;
|
||||||
|
|
||||||
int bigramCount = 0;
|
int bigramCount = 0;
|
||||||
int unigramProbability = 0;
|
int unigramProbability = 0;
|
||||||
|
@ -154,8 +154,8 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
|
||||||
int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
||||||
mBinaryDictionaryInfo, prevWord, prevWordLength, forceLowerCaseSearch);
|
mBinaryDictionaryInfo, prevWord, prevWordLength, forceLowerCaseSearch);
|
||||||
if (NOT_A_VALID_WORD_POS == pos) return 0;
|
if (NOT_A_VALID_WORD_POS == pos) return 0;
|
||||||
return BinaryFormat::getBigramListPositionForWordPosition(
|
return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode(
|
||||||
mBinaryDictionaryInfo->getDictRoot(), pos);
|
mBinaryDictionaryInfo, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) const {
|
bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) const {
|
||||||
|
@ -178,7 +178,7 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w
|
||||||
int length1) const {
|
int length1) const {
|
||||||
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
|
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
|
||||||
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
||||||
if (0 == pos) return false;
|
if (NOT_A_DICT_POS == pos) return false;
|
||||||
int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
||||||
mBinaryDictionaryInfo, word1, length1, false /* forceLowerCaseSearch */);
|
mBinaryDictionaryInfo, word1, length1, false /* forceLowerCaseSearch */);
|
||||||
if (NOT_A_VALID_WORD_POS == nextWordPos) return false;
|
if (NOT_A_VALID_WORD_POS == nextWordPos) return false;
|
||||||
|
|
|
@ -28,7 +28,7 @@ class BinaryDictionaryBigramsIterator {
|
||||||
BinaryDictionaryBigramsIterator(
|
BinaryDictionaryBigramsIterator(
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos)
|
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos)
|
||||||
: mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(pos), mBigramFlags(0),
|
: mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(pos), mBigramFlags(0),
|
||||||
mBigramPos(0), mHasNext(true) {}
|
mBigramPos(NOT_A_DICT_POS), mHasNext(pos != NOT_A_DICT_POS) {}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool hasNext() const {
|
AK_FORCE_INLINE bool hasNext() const {
|
||||||
return mHasNext;
|
return mHasNext;
|
||||||
|
|
|
@ -73,8 +73,11 @@ class BinaryFormat {
|
||||||
const int length, const bool forceLowerCaseSearch);
|
const int length, const bool forceLowerCaseSearch);
|
||||||
static int getCodePointsAndProbabilityAndReturnCodePointCount(
|
static int getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const uint8_t *const root, const int nodePos, const int maxCodePointCount,
|
const uint8_t *const root, const int nodePos, const int maxCodePointCount,
|
||||||
int *outCodePoints, int *outUnigramProbability);
|
int *const outCodePoints, int *const outUnigramProbability);
|
||||||
static int getBigramListPositionForWordPosition(const uint8_t *const root, int position);
|
static int getBigramListPositionForWordPosition(const uint8_t *const root,
|
||||||
|
const int nodePosition);
|
||||||
|
static int getShortcutListPositionForWordPosition(const uint8_t *const root,
|
||||||
|
const int nodePosition);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat);
|
||||||
|
@ -344,8 +347,8 @@ AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
||||||
* Return value : the length of the word, of 0 if the word was not found.
|
* Return value : the length of the word, of 0 if the word was not found.
|
||||||
*/
|
*/
|
||||||
AK_FORCE_INLINE int BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointCount(
|
AK_FORCE_INLINE int BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const uint8_t *const root, const int nodePos,
|
const uint8_t *const root, const int nodePos, const int maxCodePointCount,
|
||||||
const int maxCodePointCount, int *outCodePoints, int *outUnigramProbability) {
|
int *const outCodePoints, int *const outUnigramProbability) {
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
int wordPos = 0;
|
int wordPos = 0;
|
||||||
|
|
||||||
|
@ -473,10 +476,11 @@ AK_FORCE_INLINE int BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointC
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE int BinaryFormat::getBigramListPositionForWordPosition(
|
AK_FORCE_INLINE int BinaryFormat::getBigramListPositionForWordPosition(
|
||||||
const uint8_t *const root, int position) {
|
const uint8_t *const root, const int nodePosition) {
|
||||||
if (NOT_A_VALID_WORD_POS == position) return 0;
|
if (NOT_A_VALID_WORD_POS == nodePosition) return NOT_A_DICT_POS;
|
||||||
|
int position = nodePosition;
|
||||||
const uint8_t flags = getFlagsAndForwardPointer(root, &position);
|
const uint8_t flags = getFlagsAndForwardPointer(root, &position);
|
||||||
if (!(flags & FLAG_HAS_BIGRAMS)) return 0;
|
if (!(flags & FLAG_HAS_BIGRAMS)) return NOT_A_DICT_POS;
|
||||||
if (flags & FLAG_HAS_MULTIPLE_CHARS) {
|
if (flags & FLAG_HAS_MULTIPLE_CHARS) {
|
||||||
position = skipOtherCharacters(root, position);
|
position = skipOtherCharacters(root, position);
|
||||||
} else {
|
} else {
|
||||||
|
@ -488,5 +492,21 @@ AK_FORCE_INLINE int BinaryFormat::getBigramListPositionForWordPosition(
|
||||||
return position;
|
return position;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int BinaryFormat::getShortcutListPositionForWordPosition(
|
||||||
|
const uint8_t *const root, const int nodePosition) {
|
||||||
|
if (NOT_A_VALID_WORD_POS == nodePosition) return NOT_A_DICT_POS;
|
||||||
|
int position = nodePosition;
|
||||||
|
const uint8_t flags = getFlagsAndForwardPointer(root, &position);
|
||||||
|
if (!(flags & FLAG_HAS_SHORTCUT_TARGETS)) return NOT_A_DICT_POS;
|
||||||
|
if (flags & FLAG_HAS_MULTIPLE_CHARS) {
|
||||||
|
position = skipOtherCharacters(root, position);
|
||||||
|
} else {
|
||||||
|
getCodePointAndForwardPointer(root, &position);
|
||||||
|
}
|
||||||
|
position = skipProbability(flags, position);
|
||||||
|
position = skipChildrenPosition(flags, position);
|
||||||
|
return position;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_BINARY_FORMAT_H
|
#endif // LATINIME_BINARY_FORMAT_H
|
||||||
|
|
|
@ -22,8 +22,8 @@
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
|
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
||||||
#include "suggest/core/dictionary/binary_format.h"
|
|
||||||
#include "suggest/core/dictionary/bloom_filter.h"
|
#include "suggest/core/dictionary/bloom_filter.h"
|
||||||
|
#include "suggest/core/dictionary/probability_utils.h"
|
||||||
#include "utils/hash_map_compat.h"
|
#include "utils/hash_map_compat.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -67,11 +67,8 @@ class MultiBigramMap {
|
||||||
~BigramMap() {}
|
~BigramMap() {}
|
||||||
|
|
||||||
void init(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) {
|
void init(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) {
|
||||||
const int bigramsListPos = BinaryFormat::getBigramListPositionForWordPosition(
|
const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()->
|
||||||
binaryDictionaryInfo->getDictRoot(), nodePos);
|
getBigramsPositionOfNode(binaryDictionaryInfo, nodePos);
|
||||||
if (0 == bigramsListPos) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
for (BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
|
for (BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
|
||||||
bigramsIt.hasNext(); /* no-op */) {
|
bigramsIt.hasNext(); /* no-op */) {
|
||||||
bigramsIt.next();
|
bigramsIt.next();
|
||||||
|
@ -110,11 +107,8 @@ class MultiBigramMap {
|
||||||
AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary(
|
AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary(
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos,
|
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos,
|
||||||
const int nextWordPosition, const int unigramProbability) {
|
const int nextWordPosition, const int unigramProbability) {
|
||||||
const int bigramsListPos = BinaryFormat::getBigramListPositionForWordPosition(
|
const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()->
|
||||||
binaryDictionaryInfo->getDictRoot(), nodePos);
|
getBigramsPositionOfNode(binaryDictionaryInfo, nodePos);
|
||||||
if (0 == bigramsListPos) {
|
|
||||||
return ProbabilityUtils::backoff(unigramProbability);
|
|
||||||
}
|
|
||||||
for (BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
|
for (BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
|
||||||
bigramsIt.hasNext(); /* no-op */) {
|
bigramsIt.hasNext(); /* no-op */) {
|
||||||
bigramsIt.next();
|
bigramsIt.next();
|
||||||
|
|
|
@ -62,6 +62,12 @@ class DictionaryStructurePolicy {
|
||||||
virtual int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
virtual int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
const int nodePos) const = 0;
|
const int nodePos) const = 0;
|
||||||
|
|
||||||
|
virtual int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
|
const int nodePos) const = 0;
|
||||||
|
|
||||||
|
virtual int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
|
const int nodePos) const = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DictionaryStructurePolicy() {}
|
DictionaryStructurePolicy() {}
|
||||||
virtual ~DictionaryStructurePolicy() {}
|
virtual ~DictionaryStructurePolicy() {}
|
||||||
|
|
|
@ -210,14 +210,16 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!terminalDicNode->hasMultipleWords()) {
|
if (!terminalDicNode->hasMultipleWords()) {
|
||||||
|
const BinaryDictionaryInfo *const binaryDictionaryInfo =
|
||||||
|
traverseSession->getBinaryDictionaryInfo();
|
||||||
const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(),
|
const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(),
|
||||||
terminalDicNode->getAttributesPos());
|
binaryDictionaryInfo->getStructurePolicy()->getShortcutPositionOfNode(
|
||||||
|
binaryDictionaryInfo, terminalDicNode->getPos()));
|
||||||
// Shortcut is not supported for multiple words suggestions.
|
// Shortcut is not supported for multiple words suggestions.
|
||||||
// TODO: Check shortcuts during traversal for multiple words suggestions.
|
// TODO: Check shortcuts during traversal for multiple words suggestions.
|
||||||
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
|
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
|
||||||
outputWordIndex = ShortcutUtils::outputShortcuts(&terminalAttributes, outputWordIndex,
|
outputWordIndex = ShortcutUtils::outputShortcuts(&terminalAttributes, outputWordIndex,
|
||||||
finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped);
|
finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped);
|
||||||
|
|
||||||
}
|
}
|
||||||
DicNode::managedDelete(terminalDicNode);
|
DicNode::managedDelete(terminalDicNode);
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,4 +69,18 @@ int PatriciaTriePolicy::getUnigramProbability(
|
||||||
return BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
|
return BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int PatriciaTriePolicy::getShortcutPositionOfNode(
|
||||||
|
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
|
const int nodePos) const {
|
||||||
|
return BinaryFormat::getShortcutListPositionForWordPosition(
|
||||||
|
binaryDictionaryInfo->getDictRoot(), nodePos);
|
||||||
|
}
|
||||||
|
|
||||||
|
int PatriciaTriePolicy::getBigramsPositionOfNode(
|
||||||
|
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
|
const int nodePos) const {
|
||||||
|
return BinaryFormat::getBigramListPositionForWordPosition(
|
||||||
|
binaryDictionaryInfo->getDictRoot(), nodePos);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -48,6 +48,12 @@ class PatriciaTriePolicy : public DictionaryStructurePolicy {
|
||||||
int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
const int nodePos) const;
|
const int nodePos) const;
|
||||||
|
|
||||||
|
int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
|
const int nodePos) const;
|
||||||
|
|
||||||
|
int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
|
const int nodePos) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(PatriciaTriePolicy);
|
DISALLOW_COPY_AND_ASSIGN(PatriciaTriePolicy);
|
||||||
static const PatriciaTriePolicy sInstance;
|
static const PatriciaTriePolicy sInstance;
|
||||||
|
|
Loading…
Reference in New Issue