Merge DicNodeStatePrevWord into DicNoteStateOutput.
Before: (0) 2232.70 (0.86%) (1) 255258.50 (98.89%) (2) 585.73 (0.23%) (66) 0.26 (0.00%) Total 258126.46 (sum of others 258077.18) After: (0) 2249.23 (0.93%) (1) 239883.63 (98.83%) (2) 554.82 (0.23%) (66) 0.35 (0.00%) Total 242734.38 (sum of others 242688.04) Change-Id: I9760cae5b98b3d1f4804b6b60317887eaa3ff71c
This commit is contained in:
parent
adfb262797
commit
eddbb7ac88
7 changed files with 180 additions and 222 deletions
|
@ -20,29 +20,34 @@
|
|||
#include "defines.h"
|
||||
#include "suggest/core/dicnode/dic_node_profiler.h"
|
||||
#include "suggest/core/dicnode/dic_node_release_listener.h"
|
||||
#include "suggest/core/dicnode/dic_node_utils.h"
|
||||
#include "suggest/core/dicnode/internal/dic_node_state.h"
|
||||
#include "suggest/core/dicnode/internal/dic_node_properties.h"
|
||||
#include "suggest/core/dictionary/digraph_utils.h"
|
||||
#include "suggest/core/dictionary/error_type_utils.h"
|
||||
#include "suggest/core/layout/proximity_info_state.h"
|
||||
#include "utils/char_utils.h"
|
||||
|
||||
#if DEBUG_DICT
|
||||
#define LOGI_SHOW_ADD_COST_PROP \
|
||||
do { char charBuf[50]; \
|
||||
INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
|
||||
AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
|
||||
__FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
|
||||
getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0)
|
||||
do { \
|
||||
char charBuf[50]; \
|
||||
INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
|
||||
AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
|
||||
__FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
|
||||
getInputIndex(0), getNormalizedCompoundDistance(), charBuf); \
|
||||
} while (0)
|
||||
#define DUMP_WORD_AND_SCORE(header) \
|
||||
do { char charBuf[50]; char prevWordCharBuf[50]; \
|
||||
INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
|
||||
INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.getPrevWordBuf(), \
|
||||
mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \
|
||||
NELEMS(prevWordCharBuf)); \
|
||||
AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d, %5f,", header, \
|
||||
getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \
|
||||
getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \
|
||||
getInputIndex(0), getNormalizedCompoundDistanceAfterFirstWord()); \
|
||||
do { \
|
||||
char charBuf[50]; \
|
||||
INTS_TO_CHARS(getOutputWordBuf(), \
|
||||
getNodeCodePointCount() \
|
||||
+ mDicNodeState.mDicNodeStateOutput.getPrevWordsLength(), \
|
||||
charBuf, NELEMS(charBuf)); \
|
||||
AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %d, %5f,", header, \
|
||||
getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \
|
||||
getNormalizedCompoundDistance(), getRawLength(), charBuf, \
|
||||
getInputIndex(0), getNormalizedCompoundDistanceAfterFirstWord()); \
|
||||
} while (0)
|
||||
#else
|
||||
#define LOGI_SHOW_ADD_COST_PROP
|
||||
|
@ -103,8 +108,8 @@ class DicNode {
|
|||
void initByCopy(const DicNode *const dicNode) {
|
||||
mIsUsed = true;
|
||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||
mDicNodeProperties.init(&dicNode->mDicNodeProperties);
|
||||
mDicNodeState.init(&dicNode->mDicNodeState);
|
||||
mDicNodeProperties.initByCopy(&dicNode->mDicNodeProperties);
|
||||
mDicNodeState.initByCopy(&dicNode->mDicNodeState);
|
||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||
}
|
||||
|
||||
|
@ -112,12 +117,8 @@ class DicNode {
|
|||
void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) {
|
||||
mIsUsed = true;
|
||||
mIsCachedForNextSuggestion = false;
|
||||
mDicNodeProperties.init(
|
||||
NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
|
||||
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
|
||||
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
|
||||
0 /* terminalDepth */);
|
||||
mDicNodeState.init(prevWordPtNodePos);
|
||||
mDicNodeProperties.init(rootPtNodeArrayPos, prevWordPtNodePos);
|
||||
mDicNodeState.init();
|
||||
PROF_NODE_RESET(mProfiler);
|
||||
}
|
||||
|
||||
|
@ -125,13 +126,8 @@ class DicNode {
|
|||
void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
|
||||
mIsUsed = true;
|
||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||
mDicNodeProperties.init(
|
||||
NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
|
||||
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
|
||||
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
|
||||
0 /* terminalDepth */);
|
||||
mDicNodeProperties.init(rootPtNodeArrayPos, dicNode->mDicNodeProperties.getPtNodePos());
|
||||
mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState,
|
||||
dicNode->mDicNodeProperties.getPtNodePos(),
|
||||
dicNode->mDicNodeProperties.getDepth());
|
||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||
}
|
||||
|
@ -141,7 +137,7 @@ class DicNode {
|
|||
mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
|
||||
const int parentCodePoint = parentDicNode->getNodeTypedCodePoint();
|
||||
mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint);
|
||||
mDicNodeState.init(&parentDicNode->mDicNodeState);
|
||||
mDicNodeState.initByCopy(&parentDicNode->mDicNodeState);
|
||||
PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler);
|
||||
}
|
||||
|
||||
|
@ -156,7 +152,7 @@ class DicNode {
|
|||
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
|
||||
mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
|
||||
probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
|
||||
newLeavingDepth);
|
||||
newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordTerminalPtNodePos());
|
||||
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
|
||||
mergedNodeCodePoints);
|
||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||
|
@ -200,7 +196,7 @@ class DicNode {
|
|||
|
||||
// Used to expand the node in DicNodeUtils
|
||||
int getNodeTypedCodePoint() const {
|
||||
return mDicNodeState.mDicNodeStateOutput.getCodePointAt(getNodeCodePointCount());
|
||||
return mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(getNodeCodePointCount());
|
||||
}
|
||||
|
||||
// Check if the current word and the previous word can be considered as a valid multiple word
|
||||
|
@ -211,19 +207,19 @@ class DicNode {
|
|||
}
|
||||
// Treat suggestion as invalid if the current and the previous word are single character
|
||||
// words.
|
||||
const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
|
||||
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1;
|
||||
const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
|
||||
- mDicNodeState.mDicNodeStateOutput.getPrevWordStart() - 1;
|
||||
const int currentWordLen = getNodeCodePointCount();
|
||||
return (prevWordLen != 1 || currentWordLen != 1);
|
||||
}
|
||||
|
||||
bool isFirstCharUppercase() const {
|
||||
const int c = mDicNodeState.mDicNodeStateOutput.getCodePointAt(0);
|
||||
const int c = mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(0);
|
||||
return CharUtils::isAsciiUpper(c);
|
||||
}
|
||||
|
||||
bool isFirstWord() const {
|
||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS;
|
||||
return mDicNodeProperties.getPrevWordTerminalPtNodePos() == NOT_A_DICT_POS;
|
||||
}
|
||||
|
||||
bool isCompletion(const int inputSize) const {
|
||||
|
@ -241,7 +237,7 @@ class DicNode {
|
|||
|
||||
// Used to get bigram probability in DicNodeUtils
|
||||
int getPrevWordTerminalPtNodePos() const {
|
||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
|
||||
return mDicNodeProperties.getPrevWordTerminalPtNodePos();
|
||||
}
|
||||
|
||||
// Used in DicNodeUtils
|
||||
|
@ -263,8 +259,8 @@ class DicNode {
|
|||
|
||||
bool shouldBeFilteredBySafetyNetForBigram() const {
|
||||
const uint16_t currentDepth = getNodeCodePointCount();
|
||||
const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
|
||||
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1;
|
||||
const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
|
||||
- mDicNodeState.mDicNodeStateOutput.getPrevWordStart() - 1;
|
||||
return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1));
|
||||
}
|
||||
|
||||
|
@ -277,7 +273,7 @@ class DicNode {
|
|||
}
|
||||
|
||||
bool isTotalInputSizeExceedingLimit() const {
|
||||
const int prevWordsLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
|
||||
const int prevWordsLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength();
|
||||
const int currentWordDepth = getNodeCodePointCount();
|
||||
// TODO: 3 can be 2? Needs to be investigated.
|
||||
// TODO: Have a const variable for 3 (or 2)
|
||||
|
@ -285,25 +281,24 @@ class DicNode {
|
|||
}
|
||||
|
||||
void outputResult(int *dest) const {
|
||||
const uint16_t prevWordLength = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
|
||||
const uint16_t prevWordLength = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength();
|
||||
const uint16_t currentDepth = getNodeCodePointCount();
|
||||
DicNodeUtils::appendTwoWords(mDicNodeState.mDicNodeStatePrevWord.getPrevWordBuf(),
|
||||
prevWordLength, getOutputWordBuf(), currentDepth, dest);
|
||||
memmove(dest, getOutputWordBuf(), (prevWordLength + currentDepth) * sizeof(dest[0]));
|
||||
DUMP_WORD_AND_SCORE("OUTPUT");
|
||||
}
|
||||
|
||||
// "Total" in this context (and other methods in this class) means the whole suggestion. When
|
||||
// this represents a multi-word suggestion, the referenced PtNode (in mDicNodeState) is only
|
||||
// the one that corresponds to the last word of the suggestion, and all the previous words
|
||||
// are concatenated together in mPrevWord - which contains a space at the end.
|
||||
// are concatenated together in mDicNodeStateOutput.
|
||||
int getTotalNodeSpaceCount() const {
|
||||
if (isFirstWord()) return 0;
|
||||
return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStatePrevWord.getPrevWordBuf(),
|
||||
mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength());
|
||||
return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStateOutput.getCodePointBuf(),
|
||||
mDicNodeState.mDicNodeStateOutput.getPrevWordsLength());
|
||||
}
|
||||
|
||||
int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const {
|
||||
const int inputIndex = mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex();
|
||||
const int inputIndex = mDicNodeState.mDicNodeStateOutput.getSecondWordFirstInputIndex();
|
||||
if (inputIndex == NOT_AN_INDEX) {
|
||||
return NOT_AN_INDEX;
|
||||
} else {
|
||||
|
@ -312,7 +307,7 @@ class DicNode {
|
|||
}
|
||||
|
||||
bool hasMultipleWords() const {
|
||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() > 0;
|
||||
return mDicNodeState.mDicNodeStateOutput.getPrevWordCount() > 0;
|
||||
}
|
||||
|
||||
int getProximityCorrectionCount() const {
|
||||
|
@ -346,7 +341,7 @@ class DicNode {
|
|||
|
||||
// Used to commit input partially
|
||||
int getPrevWordPtNodePos() const {
|
||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
|
||||
return mDicNodeProperties.getPrevWordTerminalPtNodePos();
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE const int *getOutputWordBuf() const {
|
||||
|
@ -425,7 +420,7 @@ class DicNode {
|
|||
float getLanguageDistanceRatePerWordForScoring() const {
|
||||
const float langDist = getLanguageDistanceForScoring();
|
||||
const float totalWordCount =
|
||||
static_cast<float>(mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1);
|
||||
static_cast<float>(mDicNodeState.mDicNodeStateOutput.getPrevWordCount() + 1);
|
||||
return langDist / totalWordCount;
|
||||
}
|
||||
|
||||
|
@ -469,7 +464,7 @@ class DicNode {
|
|||
|
||||
// Returns code point count including spaces
|
||||
inline uint16_t getTotalNodeCodePointCount() const {
|
||||
return getNodeCodePointCount() + mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
|
||||
return getNodeCodePointCount() + mDicNodeState.mDicNodeStateOutput.getPrevWordsLength();
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void dump(const char *tag) const {
|
||||
|
@ -516,8 +511,9 @@ class DicNode {
|
|||
return depthDiff > 0;
|
||||
}
|
||||
for (int i = 0; i < depth; ++i) {
|
||||
const int codePoint = mDicNodeState.mDicNodeStateOutput.getCodePointAt(i);
|
||||
const int rightCodePoint = right->mDicNodeState.mDicNodeStateOutput.getCodePointAt(i);
|
||||
const int codePoint = mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(i);
|
||||
const int rightCodePoint =
|
||||
right->mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(i);
|
||||
if (codePoint != rightCodePoint) {
|
||||
return rightCodePoint > codePoint;
|
||||
}
|
||||
|
@ -574,8 +570,8 @@ class DicNode {
|
|||
}
|
||||
|
||||
AK_FORCE_INLINE void updateInputIndexG(const DicNode_InputStateG *const inputStateG) {
|
||||
if (mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() == 1 && isFirstLetter()) {
|
||||
mDicNodeState.mDicNodeStatePrevWord.setSecondWordFirstInputIndex(
|
||||
if (mDicNodeState.mDicNodeStateOutput.getPrevWordCount() == 1 && isFirstLetter()) {
|
||||
mDicNodeState.mDicNodeStateOutput.setSecondWordFirstInputIndex(
|
||||
inputStateG->mInputIndex);
|
||||
}
|
||||
mDicNodeState.mDicNodeStateInput.updateInputIndexG(inputStateG->mPointerId,
|
||||
|
|
|
@ -29,16 +29,18 @@ namespace latinime {
|
|||
class DicNodeProperties {
|
||||
public:
|
||||
AK_FORCE_INLINE DicNodeProperties()
|
||||
: mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0),
|
||||
mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false),
|
||||
mDepth(0), mLeavingDepth(0) {}
|
||||
: mPtNodePos(NOT_A_DICT_POS), mChildrenPtNodeArrayPos(NOT_A_DICT_POS),
|
||||
mProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT),
|
||||
mIsTerminal(false), mHasChildrenPtNodes(false),
|
||||
mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0),
|
||||
mPrevWordTerminalPtNodePos(NOT_A_DICT_POS) {}
|
||||
|
||||
~DicNodeProperties() {}
|
||||
|
||||
// Should be called only once per DicNode is initialized.
|
||||
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
|
||||
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
|
||||
const uint16_t depth, const uint16_t leavingDepth) {
|
||||
const uint16_t depth, const uint16_t leavingDepth, const int prevWordNodePos) {
|
||||
mPtNodePos = pos;
|
||||
mChildrenPtNodeArrayPos = childrenPos;
|
||||
mDicNodeCodePoint = nodeCodePoint;
|
||||
|
@ -48,10 +50,24 @@ class DicNodeProperties {
|
|||
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
|
||||
mDepth = depth;
|
||||
mLeavingDepth = leavingDepth;
|
||||
mPrevWordTerminalPtNodePos = prevWordNodePos;
|
||||
}
|
||||
|
||||
// Init for copy
|
||||
void init(const DicNodeProperties *const dicNodeProp) {
|
||||
// Init for root with prevWordPtNodePos which is used for bigram
|
||||
void init(const int rootPtNodeArrayPos, const int prevWordNodePos) {
|
||||
mPtNodePos = NOT_A_DICT_POS;
|
||||
mChildrenPtNodeArrayPos = rootPtNodeArrayPos;
|
||||
mDicNodeCodePoint = NOT_A_CODE_POINT;
|
||||
mProbability = NOT_A_PROBABILITY;
|
||||
mIsTerminal = false;
|
||||
mHasChildrenPtNodes = true;
|
||||
mIsBlacklistedOrNotAWord = false;
|
||||
mDepth = 0;
|
||||
mLeavingDepth = 0;
|
||||
mPrevWordTerminalPtNodePos = prevWordNodePos;
|
||||
}
|
||||
|
||||
void initByCopy(const DicNodeProperties *const dicNodeProp) {
|
||||
mPtNodePos = dicNodeProp->mPtNodePos;
|
||||
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
|
||||
mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
|
||||
|
@ -61,6 +77,7 @@ class DicNodeProperties {
|
|||
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
|
||||
mDepth = dicNodeProp->mDepth;
|
||||
mLeavingDepth = dicNodeProp->mLeavingDepth;
|
||||
mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos;
|
||||
}
|
||||
|
||||
// Init as passing child
|
||||
|
@ -74,6 +91,7 @@ class DicNodeProperties {
|
|||
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
|
||||
mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
|
||||
mLeavingDepth = dicNodeProp->mLeavingDepth;
|
||||
mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos;
|
||||
}
|
||||
|
||||
int getPtNodePos() const {
|
||||
|
@ -113,6 +131,10 @@ class DicNodeProperties {
|
|||
return mIsBlacklistedOrNotAWord;
|
||||
}
|
||||
|
||||
int getPrevWordTerminalPtNodePos() const {
|
||||
return mPrevWordTerminalPtNodePos;
|
||||
}
|
||||
|
||||
private:
|
||||
// Caution!!!
|
||||
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||
|
@ -126,6 +148,7 @@ class DicNodeProperties {
|
|||
bool mIsBlacklistedOrNotAWord;
|
||||
uint16_t mDepth;
|
||||
uint16_t mLeavingDepth;
|
||||
int mPrevWordTerminalPtNodePos;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_PROPERTIES_H
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
#include "defines.h"
|
||||
#include "suggest/core/dicnode/internal/dic_node_state_input.h"
|
||||
#include "suggest/core/dicnode/internal/dic_node_state_output.h"
|
||||
#include "suggest/core/dicnode/internal/dic_node_state_prevword.h"
|
||||
#include "suggest/core/dicnode/internal/dic_node_state_scoring.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -29,65 +28,50 @@ class DicNodeState {
|
|||
public:
|
||||
DicNodeStateInput mDicNodeStateInput;
|
||||
DicNodeStateOutput mDicNodeStateOutput;
|
||||
DicNodeStatePrevWord mDicNodeStatePrevWord;
|
||||
DicNodeStateScoring mDicNodeStateScoring;
|
||||
|
||||
AK_FORCE_INLINE DicNodeState()
|
||||
: mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStatePrevWord(),
|
||||
mDicNodeStateScoring() {
|
||||
}
|
||||
: mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStateScoring() {}
|
||||
|
||||
~DicNodeState() {}
|
||||
|
||||
DicNodeState &operator=(const DicNodeState& src) {
|
||||
init(&src);
|
||||
initByCopy(&src);
|
||||
return *this;
|
||||
}
|
||||
|
||||
DicNodeState(const DicNodeState& src)
|
||||
: mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStatePrevWord(),
|
||||
mDicNodeStateScoring() {
|
||||
init(&src);
|
||||
: mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStateScoring() {
|
||||
initByCopy(&src);
|
||||
}
|
||||
|
||||
// Init with prevWordPos
|
||||
void init(const int prevWordPos) {
|
||||
// Init for root
|
||||
void init() {
|
||||
mDicNodeStateInput.init();
|
||||
mDicNodeStateOutput.init();
|
||||
mDicNodeStatePrevWord.init(prevWordPos);
|
||||
mDicNodeStateScoring.init();
|
||||
}
|
||||
|
||||
// Init with previous word.
|
||||
void initAsRootWithPreviousWord(const DicNodeState *prevWordDicNodeState,
|
||||
const int prevWordPos, const int prevWordCodePointCount) {
|
||||
mDicNodeStateOutput.init(); // reset for next word
|
||||
const int prevWordCodePointCount) {
|
||||
mDicNodeStateOutput.init(&prevWordDicNodeState->mDicNodeStateOutput);
|
||||
mDicNodeStateInput.init(
|
||||
&prevWordDicNodeState->mDicNodeStateInput, true /* resetTerminalDiffCost */);
|
||||
mDicNodeStateScoring.init(&prevWordDicNodeState->mDicNodeStateScoring);
|
||||
mDicNodeStatePrevWord.init(
|
||||
prevWordDicNodeState->mDicNodeStatePrevWord.getPrevWordCount() + 1,
|
||||
prevWordPos,
|
||||
prevWordDicNodeState->mDicNodeStatePrevWord.getPrevWordBuf(),
|
||||
prevWordDicNodeState->mDicNodeStatePrevWord.getPrevWordLength(),
|
||||
prevWordDicNodeState->mDicNodeStateOutput.getCodePointBuf(),
|
||||
prevWordCodePointCount,
|
||||
prevWordDicNodeState->mDicNodeStatePrevWord.getSecondWordFirstInputIndex(),
|
||||
prevWordDicNodeState->mDicNodeStateInput.getInputIndex(0) /* lastInputIndex */);
|
||||
mDicNodeStateScoring.initByCopy(&prevWordDicNodeState->mDicNodeStateScoring);
|
||||
}
|
||||
|
||||
// Init by copy
|
||||
AK_FORCE_INLINE void init(const DicNodeState *const src) {
|
||||
mDicNodeStateInput.init(&src->mDicNodeStateInput);
|
||||
mDicNodeStateOutput.init(&src->mDicNodeStateOutput);
|
||||
mDicNodeStatePrevWord.init(&src->mDicNodeStatePrevWord);
|
||||
mDicNodeStateScoring.init(&src->mDicNodeStateScoring);
|
||||
AK_FORCE_INLINE void initByCopy(const DicNodeState *const src) {
|
||||
mDicNodeStateInput.initByCopy(&src->mDicNodeStateInput);
|
||||
mDicNodeStateOutput.initByCopy(&src->mDicNodeStateOutput);
|
||||
mDicNodeStateScoring.initByCopy(&src->mDicNodeStateScoring);
|
||||
}
|
||||
|
||||
// Init by copy and adding merged node code points.
|
||||
void init(const DicNodeState *const src, const uint16_t mergedNodeCodePointCount,
|
||||
const int *const mergedNodeCodePoints) {
|
||||
init(src);
|
||||
initByCopy(src);
|
||||
mDicNodeStateOutput.addMergedNodeCodePoints(
|
||||
mergedNodeCodePointCount, mergedNodeCodePoints);
|
||||
}
|
||||
|
|
|
@ -53,7 +53,7 @@ class DicNodeStateInput {
|
|||
mTerminalDiffCost[pointerId] = terminalDiffCost;
|
||||
}
|
||||
|
||||
void init(const DicNodeStateInput *const src) {
|
||||
void initByCopy(const DicNodeStateInput *const src) {
|
||||
init(src, false);
|
||||
}
|
||||
|
||||
|
|
|
@ -25,24 +25,53 @@
|
|||
|
||||
namespace latinime {
|
||||
|
||||
// Class to have information to be output. This can contain previous words when the suggestion
|
||||
// is a multi-word suggestion.
|
||||
class DicNodeStateOutput {
|
||||
public:
|
||||
DicNodeStateOutput() : mOutputtedCodePointCount(0) {}
|
||||
DicNodeStateOutput()
|
||||
: mOutputtedCodePointCount(0), mCurrentWordStart(0), mPrevWordCount(0),
|
||||
mPrevWordsLength(0), mPrevWordStart(0), mSecondWordFirstInputIndex(NOT_AN_INDEX) {}
|
||||
|
||||
~DicNodeStateOutput() {}
|
||||
|
||||
// Init for root
|
||||
void init() {
|
||||
mOutputtedCodePointCount = 0;
|
||||
mCodePointsBuf[0] = 0;
|
||||
mCurrentWordStart = 0;
|
||||
mOutputCodePoints[0] = 0;
|
||||
mPrevWordCount = 0;
|
||||
mPrevWordsLength = 0;
|
||||
mPrevWordStart = 0;
|
||||
mSecondWordFirstInputIndex = NOT_AN_INDEX;
|
||||
}
|
||||
|
||||
// Init for next word.
|
||||
void init(const DicNodeStateOutput *const stateOutput) {
|
||||
memmove(mCodePointsBuf, stateOutput->mCodePointsBuf,
|
||||
stateOutput->mOutputtedCodePointCount * sizeof(mCodePointsBuf[0]));
|
||||
mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount + 1;
|
||||
memmove(mOutputCodePoints, stateOutput->mOutputCodePoints,
|
||||
stateOutput->mOutputtedCodePointCount * sizeof(mOutputCodePoints[0]));
|
||||
mOutputCodePoints[stateOutput->mOutputtedCodePointCount] = KEYCODE_SPACE;
|
||||
mCurrentWordStart = stateOutput->mOutputtedCodePointCount + 1;
|
||||
mPrevWordCount = std::min(static_cast<int16_t>(stateOutput->mPrevWordCount + 1),
|
||||
static_cast<int16_t>(MAX_RESULTS));
|
||||
mPrevWordsLength = stateOutput->mOutputtedCodePointCount + 1;
|
||||
mPrevWordStart = stateOutput->mCurrentWordStart;
|
||||
mSecondWordFirstInputIndex = stateOutput->mSecondWordFirstInputIndex;
|
||||
}
|
||||
|
||||
void initByCopy(const DicNodeStateOutput *const stateOutput) {
|
||||
memmove(mOutputCodePoints, stateOutput->mOutputCodePoints,
|
||||
stateOutput->mOutputtedCodePointCount * sizeof(mOutputCodePoints[0]));
|
||||
mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount;
|
||||
if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
|
||||
mCodePointsBuf[mOutputtedCodePointCount] = 0;
|
||||
mOutputCodePoints[mOutputtedCodePointCount] = 0;
|
||||
}
|
||||
mCurrentWordStart = stateOutput->mCurrentWordStart;
|
||||
mPrevWordCount = stateOutput->mPrevWordCount;
|
||||
mPrevWordsLength = stateOutput->mPrevWordsLength;
|
||||
mPrevWordStart = stateOutput->mPrevWordStart;
|
||||
mSecondWordFirstInputIndex = stateOutput->mSecondWordFirstInputIndex;
|
||||
}
|
||||
|
||||
void addMergedNodeCodePoints(const uint16_t mergedNodeCodePointCount,
|
||||
|
@ -51,29 +80,72 @@ class DicNodeStateOutput {
|
|||
const int additionalCodePointCount = std::min(
|
||||
static_cast<int>(mergedNodeCodePointCount),
|
||||
MAX_WORD_LENGTH - mOutputtedCodePointCount);
|
||||
memmove(&mCodePointsBuf[mOutputtedCodePointCount], mergedNodeCodePoints,
|
||||
additionalCodePointCount * sizeof(mCodePointsBuf[0]));
|
||||
memmove(&mOutputCodePoints[mOutputtedCodePointCount], mergedNodeCodePoints,
|
||||
additionalCodePointCount * sizeof(mOutputCodePoints[0]));
|
||||
mOutputtedCodePointCount = static_cast<uint16_t>(
|
||||
mOutputtedCodePointCount + mergedNodeCodePointCount);
|
||||
mOutputtedCodePointCount + additionalCodePointCount);
|
||||
if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
|
||||
mCodePointsBuf[mOutputtedCodePointCount] = 0;
|
||||
mOutputCodePoints[mOutputtedCodePointCount] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int getCodePointAt(const int index) const {
|
||||
return mCodePointsBuf[index];
|
||||
int getCurrentWordCodePointAt(const int index) const {
|
||||
return mOutputCodePoints[mCurrentWordStart + index];
|
||||
}
|
||||
|
||||
const int *getCodePointBuf() const {
|
||||
return mCodePointsBuf;
|
||||
return mOutputCodePoints;
|
||||
}
|
||||
|
||||
void setSecondWordFirstInputIndex(const int inputIndex) {
|
||||
mSecondWordFirstInputIndex = inputIndex;
|
||||
}
|
||||
|
||||
int getSecondWordFirstInputIndex() const {
|
||||
return mSecondWordFirstInputIndex;
|
||||
}
|
||||
|
||||
// TODO: remove
|
||||
int16_t getPrevWordsLength() const {
|
||||
return mPrevWordsLength;
|
||||
}
|
||||
|
||||
int16_t getPrevWordCount() const {
|
||||
return mPrevWordCount;
|
||||
}
|
||||
|
||||
int16_t getPrevWordStart() const {
|
||||
return mPrevWordStart;
|
||||
}
|
||||
|
||||
int getOutputCodePointAt(const int id) const {
|
||||
return mOutputCodePoints[id];
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(DicNodeStateOutput);
|
||||
|
||||
// When the DicNode represents "this is a pen":
|
||||
// mOutputtedCodePointCount is 13, which is total code point count of "this is a pen" including
|
||||
// spaces.
|
||||
// mCurrentWordStart indicates the head of "pen", thus it is 10.
|
||||
// This contains 3 previous words, "this", "is" and "a"; thus, mPrevWordCount is 3.
|
||||
// mPrevWordsLength is length of "this is a ", which is 10.
|
||||
// mPrevWordStart is the start index of "a"; thus, it is 8.
|
||||
// mSecondWordFirstInputIndex is the first input index of "is".
|
||||
|
||||
uint16_t mOutputtedCodePointCount;
|
||||
int mCodePointsBuf[MAX_WORD_LENGTH];
|
||||
int mOutputCodePoints[MAX_WORD_LENGTH];
|
||||
int16_t mCurrentWordStart;
|
||||
// Previous word count in mOutputCodePoints.
|
||||
int16_t mPrevWordCount;
|
||||
// Total length of previous words in mOutputCodePoints. This is being used by the algorithm
|
||||
// that may want to look at the previous word information.
|
||||
int16_t mPrevWordsLength;
|
||||
// Start index of the previous word in mOutputCodePoints. This is being used for auto commit.
|
||||
int16_t mPrevWordStart;
|
||||
int mSecondWordFirstInputIndex;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_STATE_OUTPUT_H
|
||||
|
|
|
@ -1,117 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DIC_NODE_STATE_PREVWORD_H
|
||||
#define LATINIME_DIC_NODE_STATE_PREVWORD_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring> // for memset() and memmove()
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/dicnode/dic_node_utils.h"
|
||||
#include "suggest/core/layout/proximity_info_state.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class DicNodeStatePrevWord {
|
||||
public:
|
||||
AK_FORCE_INLINE DicNodeStatePrevWord()
|
||||
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0),
|
||||
mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {}
|
||||
|
||||
~DicNodeStatePrevWord() {}
|
||||
|
||||
void init(const int prevWordNodePos) {
|
||||
mPrevWordLength = 0;
|
||||
mPrevWordCount = 0;
|
||||
mPrevWordStart = 0;
|
||||
mPrevWordPtNodePos = prevWordNodePos;
|
||||
mSecondWordFirstInputIndex = NOT_AN_INDEX;
|
||||
mPrevWord[0] = 0;
|
||||
}
|
||||
|
||||
// Init by copy
|
||||
AK_FORCE_INLINE void init(const DicNodeStatePrevWord *const prevWord) {
|
||||
mPrevWordLength = prevWord->mPrevWordLength;
|
||||
mPrevWordCount = prevWord->mPrevWordCount;
|
||||
mPrevWordStart = prevWord->mPrevWordStart;
|
||||
mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos;
|
||||
mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex;
|
||||
memmove(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
|
||||
}
|
||||
|
||||
void init(const int16_t prevWordCount, const int prevWordNodePos, const int *const src0,
|
||||
const int16_t length0, const int *const src1, const int16_t length1,
|
||||
const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) {
|
||||
mPrevWordCount = std::min(prevWordCount, static_cast<int16_t>(MAX_RESULTS));
|
||||
mPrevWordPtNodePos = prevWordNodePos;
|
||||
int twoWordsLen =
|
||||
DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord);
|
||||
if (twoWordsLen >= MAX_WORD_LENGTH) {
|
||||
twoWordsLen = MAX_WORD_LENGTH - 1;
|
||||
}
|
||||
mPrevWord[twoWordsLen] = KEYCODE_SPACE;
|
||||
mPrevWordStart = length0;
|
||||
mPrevWordLength = static_cast<int16_t>(twoWordsLen + 1);
|
||||
mSecondWordFirstInputIndex = prevWordSecondWordFirstInputIndex;
|
||||
}
|
||||
|
||||
void setSecondWordFirstInputIndex(const int inputIndex) {
|
||||
mSecondWordFirstInputIndex = inputIndex;
|
||||
}
|
||||
|
||||
int getSecondWordFirstInputIndex() const {
|
||||
return mSecondWordFirstInputIndex;
|
||||
}
|
||||
|
||||
// TODO: remove
|
||||
int16_t getPrevWordLength() const {
|
||||
return mPrevWordLength;
|
||||
}
|
||||
|
||||
int16_t getPrevWordCount() const {
|
||||
return mPrevWordCount;
|
||||
}
|
||||
|
||||
int16_t getPrevWordStart() const {
|
||||
return mPrevWordStart;
|
||||
}
|
||||
|
||||
int getPrevWordPtNodePos() const {
|
||||
return mPrevWordPtNodePos;
|
||||
}
|
||||
|
||||
int getPrevWordCodePointAt(const int id) const {
|
||||
return mPrevWord[id];
|
||||
}
|
||||
|
||||
const int *getPrevWordBuf() const {
|
||||
return mPrevWord;
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(DicNodeStatePrevWord);
|
||||
|
||||
int16_t mPrevWordCount;
|
||||
int16_t mPrevWordLength;
|
||||
int16_t mPrevWordStart;
|
||||
int mPrevWordPtNodePos;
|
||||
int mSecondWordFirstInputIndex;
|
||||
int mPrevWord[MAX_WORD_LENGTH];
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_STATE_PREVWORD_H
|
|
@ -53,7 +53,7 @@ class DicNodeStateScoring {
|
|||
mContainedErrorTypes = ErrorTypeUtils::NOT_AN_ERROR;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) {
|
||||
AK_FORCE_INLINE void initByCopy(const DicNodeStateScoring *const scoring) {
|
||||
mEditCorrectionCount = scoring->mEditCorrectionCount;
|
||||
mProximityCorrectionCount = scoring->mProximityCorrectionCount;
|
||||
mCompletionCount = scoring->mCompletionCount;
|
||||
|
|
Loading…
Reference in a new issue