Merge DicNodeStatePrevWord into DicNoteStateOutput.

Before:
(0)  2232.70 (0.86%)
(1)  255258.50 (98.89%)
(2)  585.73 (0.23%)
(66)  0.26 (0.00%)
Total 258126.46 (sum of others 258077.18)

After:
(0)  2249.23 (0.93%)
(1)  239883.63 (98.83%)
(2)  554.82 (0.23%)
(66)  0.35 (0.00%)
Total 242734.38 (sum of others 242688.04)

Change-Id: I9760cae5b98b3d1f4804b6b60317887eaa3ff71c
This commit is contained in:
Keisuke Kuroyanagi 2014-03-25 18:07:09 +09:00
parent adfb262797
commit eddbb7ac88
7 changed files with 180 additions and 222 deletions

View file

@ -20,28 +20,33 @@
#include "defines.h" #include "defines.h"
#include "suggest/core/dicnode/dic_node_profiler.h" #include "suggest/core/dicnode/dic_node_profiler.h"
#include "suggest/core/dicnode/dic_node_release_listener.h" #include "suggest/core/dicnode/dic_node_release_listener.h"
#include "suggest/core/dicnode/dic_node_utils.h"
#include "suggest/core/dicnode/internal/dic_node_state.h" #include "suggest/core/dicnode/internal/dic_node_state.h"
#include "suggest/core/dicnode/internal/dic_node_properties.h" #include "suggest/core/dicnode/internal/dic_node_properties.h"
#include "suggest/core/dictionary/digraph_utils.h" #include "suggest/core/dictionary/digraph_utils.h"
#include "suggest/core/dictionary/error_type_utils.h" #include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/layout/proximity_info_state.h"
#include "utils/char_utils.h" #include "utils/char_utils.h"
#if DEBUG_DICT #if DEBUG_DICT
#define LOGI_SHOW_ADD_COST_PROP \ #define LOGI_SHOW_ADD_COST_PROP \
do { char charBuf[50]; \ do { \
char charBuf[50]; \
INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \ INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \ AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
__FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \ __FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0) getInputIndex(0), getNormalizedCompoundDistance(), charBuf); \
} while (0)
#define DUMP_WORD_AND_SCORE(header) \ #define DUMP_WORD_AND_SCORE(header) \
do { char charBuf[50]; char prevWordCharBuf[50]; \ do { \
INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \ char charBuf[50]; \
INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.getPrevWordBuf(), \ INTS_TO_CHARS(getOutputWordBuf(), \
mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \ getNodeCodePointCount() \
NELEMS(prevWordCharBuf)); \ + mDicNodeState.mDicNodeStateOutput.getPrevWordsLength(), \
AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d, %5f,", header, \ charBuf, NELEMS(charBuf)); \
AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %d, %5f,", header, \
getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \ getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \
getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \ getNormalizedCompoundDistance(), getRawLength(), charBuf, \
getInputIndex(0), getNormalizedCompoundDistanceAfterFirstWord()); \ getInputIndex(0), getNormalizedCompoundDistanceAfterFirstWord()); \
} while (0) } while (0)
#else #else
@ -103,8 +108,8 @@ class DicNode {
void initByCopy(const DicNode *const dicNode) { void initByCopy(const DicNode *const dicNode) {
mIsUsed = true; mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(&dicNode->mDicNodeProperties); mDicNodeProperties.initByCopy(&dicNode->mDicNodeProperties);
mDicNodeState.init(&dicNode->mDicNodeState); mDicNodeState.initByCopy(&dicNode->mDicNodeState);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
} }
@ -112,12 +117,8 @@ class DicNode {
void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) { void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) {
mIsUsed = true; mIsUsed = true;
mIsCachedForNextSuggestion = false; mIsCachedForNextSuggestion = false;
mDicNodeProperties.init( mDicNodeProperties.init(rootPtNodeArrayPos, prevWordPtNodePos);
NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */, mDicNodeState.init();
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */);
mDicNodeState.init(prevWordPtNodePos);
PROF_NODE_RESET(mProfiler); PROF_NODE_RESET(mProfiler);
} }
@ -125,13 +126,8 @@ class DicNode {
void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) { void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
mIsUsed = true; mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init( mDicNodeProperties.init(rootPtNodeArrayPos, dicNode->mDicNodeProperties.getPtNodePos());
NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */);
mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState, mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState,
dicNode->mDicNodeProperties.getPtNodePos(),
dicNode->mDicNodeProperties.getDepth()); dicNode->mDicNodeProperties.getDepth());
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
} }
@ -141,7 +137,7 @@ class DicNode {
mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion; mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
const int parentCodePoint = parentDicNode->getNodeTypedCodePoint(); const int parentCodePoint = parentDicNode->getNodeTypedCodePoint();
mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint); mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint);
mDicNodeState.init(&parentDicNode->mDicNodeState); mDicNodeState.initByCopy(&parentDicNode->mDicNodeState);
PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler); PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler);
} }
@ -156,7 +152,7 @@ class DicNode {
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0], mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
newLeavingDepth); newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordTerminalPtNodePos());
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints); mergedNodeCodePoints);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
@ -200,7 +196,7 @@ class DicNode {
// Used to expand the node in DicNodeUtils // Used to expand the node in DicNodeUtils
int getNodeTypedCodePoint() const { int getNodeTypedCodePoint() const {
return mDicNodeState.mDicNodeStateOutput.getCodePointAt(getNodeCodePointCount()); return mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(getNodeCodePointCount());
} }
// Check if the current word and the previous word can be considered as a valid multiple word // Check if the current word and the previous word can be considered as a valid multiple word
@ -211,19 +207,19 @@ class DicNode {
} }
// Treat suggestion as invalid if the current and the previous word are single character // Treat suggestion as invalid if the current and the previous word are single character
// words. // words.
const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength() const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1; - mDicNodeState.mDicNodeStateOutput.getPrevWordStart() - 1;
const int currentWordLen = getNodeCodePointCount(); const int currentWordLen = getNodeCodePointCount();
return (prevWordLen != 1 || currentWordLen != 1); return (prevWordLen != 1 || currentWordLen != 1);
} }
bool isFirstCharUppercase() const { bool isFirstCharUppercase() const {
const int c = mDicNodeState.mDicNodeStateOutput.getCodePointAt(0); const int c = mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(0);
return CharUtils::isAsciiUpper(c); return CharUtils::isAsciiUpper(c);
} }
bool isFirstWord() const { bool isFirstWord() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS; return mDicNodeProperties.getPrevWordTerminalPtNodePos() == NOT_A_DICT_POS;
} }
bool isCompletion(const int inputSize) const { bool isCompletion(const int inputSize) const {
@ -241,7 +237,7 @@ class DicNode {
// Used to get bigram probability in DicNodeUtils // Used to get bigram probability in DicNodeUtils
int getPrevWordTerminalPtNodePos() const { int getPrevWordTerminalPtNodePos() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos(); return mDicNodeProperties.getPrevWordTerminalPtNodePos();
} }
// Used in DicNodeUtils // Used in DicNodeUtils
@ -263,8 +259,8 @@ class DicNode {
bool shouldBeFilteredBySafetyNetForBigram() const { bool shouldBeFilteredBySafetyNetForBigram() const {
const uint16_t currentDepth = getNodeCodePointCount(); const uint16_t currentDepth = getNodeCodePointCount();
const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength() const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1; - mDicNodeState.mDicNodeStateOutput.getPrevWordStart() - 1;
return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1)); return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1));
} }
@ -277,7 +273,7 @@ class DicNode {
} }
bool isTotalInputSizeExceedingLimit() const { bool isTotalInputSizeExceedingLimit() const {
const int prevWordsLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(); const int prevWordsLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength();
const int currentWordDepth = getNodeCodePointCount(); const int currentWordDepth = getNodeCodePointCount();
// TODO: 3 can be 2? Needs to be investigated. // TODO: 3 can be 2? Needs to be investigated.
// TODO: Have a const variable for 3 (or 2) // TODO: Have a const variable for 3 (or 2)
@ -285,25 +281,24 @@ class DicNode {
} }
void outputResult(int *dest) const { void outputResult(int *dest) const {
const uint16_t prevWordLength = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(); const uint16_t prevWordLength = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength();
const uint16_t currentDepth = getNodeCodePointCount(); const uint16_t currentDepth = getNodeCodePointCount();
DicNodeUtils::appendTwoWords(mDicNodeState.mDicNodeStatePrevWord.getPrevWordBuf(), memmove(dest, getOutputWordBuf(), (prevWordLength + currentDepth) * sizeof(dest[0]));
prevWordLength, getOutputWordBuf(), currentDepth, dest);
DUMP_WORD_AND_SCORE("OUTPUT"); DUMP_WORD_AND_SCORE("OUTPUT");
} }
// "Total" in this context (and other methods in this class) means the whole suggestion. When // "Total" in this context (and other methods in this class) means the whole suggestion. When
// this represents a multi-word suggestion, the referenced PtNode (in mDicNodeState) is only // this represents a multi-word suggestion, the referenced PtNode (in mDicNodeState) is only
// the one that corresponds to the last word of the suggestion, and all the previous words // the one that corresponds to the last word of the suggestion, and all the previous words
// are concatenated together in mPrevWord - which contains a space at the end. // are concatenated together in mDicNodeStateOutput.
int getTotalNodeSpaceCount() const { int getTotalNodeSpaceCount() const {
if (isFirstWord()) return 0; if (isFirstWord()) return 0;
return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStatePrevWord.getPrevWordBuf(), return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStateOutput.getCodePointBuf(),
mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()); mDicNodeState.mDicNodeStateOutput.getPrevWordsLength());
} }
int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const { int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const {
const int inputIndex = mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex(); const int inputIndex = mDicNodeState.mDicNodeStateOutput.getSecondWordFirstInputIndex();
if (inputIndex == NOT_AN_INDEX) { if (inputIndex == NOT_AN_INDEX) {
return NOT_AN_INDEX; return NOT_AN_INDEX;
} else { } else {
@ -312,7 +307,7 @@ class DicNode {
} }
bool hasMultipleWords() const { bool hasMultipleWords() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() > 0; return mDicNodeState.mDicNodeStateOutput.getPrevWordCount() > 0;
} }
int getProximityCorrectionCount() const { int getProximityCorrectionCount() const {
@ -346,7 +341,7 @@ class DicNode {
// Used to commit input partially // Used to commit input partially
int getPrevWordPtNodePos() const { int getPrevWordPtNodePos() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos(); return mDicNodeProperties.getPrevWordTerminalPtNodePos();
} }
AK_FORCE_INLINE const int *getOutputWordBuf() const { AK_FORCE_INLINE const int *getOutputWordBuf() const {
@ -425,7 +420,7 @@ class DicNode {
float getLanguageDistanceRatePerWordForScoring() const { float getLanguageDistanceRatePerWordForScoring() const {
const float langDist = getLanguageDistanceForScoring(); const float langDist = getLanguageDistanceForScoring();
const float totalWordCount = const float totalWordCount =
static_cast<float>(mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1); static_cast<float>(mDicNodeState.mDicNodeStateOutput.getPrevWordCount() + 1);
return langDist / totalWordCount; return langDist / totalWordCount;
} }
@ -469,7 +464,7 @@ class DicNode {
// Returns code point count including spaces // Returns code point count including spaces
inline uint16_t getTotalNodeCodePointCount() const { inline uint16_t getTotalNodeCodePointCount() const {
return getNodeCodePointCount() + mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(); return getNodeCodePointCount() + mDicNodeState.mDicNodeStateOutput.getPrevWordsLength();
} }
AK_FORCE_INLINE void dump(const char *tag) const { AK_FORCE_INLINE void dump(const char *tag) const {
@ -516,8 +511,9 @@ class DicNode {
return depthDiff > 0; return depthDiff > 0;
} }
for (int i = 0; i < depth; ++i) { for (int i = 0; i < depth; ++i) {
const int codePoint = mDicNodeState.mDicNodeStateOutput.getCodePointAt(i); const int codePoint = mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(i);
const int rightCodePoint = right->mDicNodeState.mDicNodeStateOutput.getCodePointAt(i); const int rightCodePoint =
right->mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(i);
if (codePoint != rightCodePoint) { if (codePoint != rightCodePoint) {
return rightCodePoint > codePoint; return rightCodePoint > codePoint;
} }
@ -574,8 +570,8 @@ class DicNode {
} }
AK_FORCE_INLINE void updateInputIndexG(const DicNode_InputStateG *const inputStateG) { AK_FORCE_INLINE void updateInputIndexG(const DicNode_InputStateG *const inputStateG) {
if (mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() == 1 && isFirstLetter()) { if (mDicNodeState.mDicNodeStateOutput.getPrevWordCount() == 1 && isFirstLetter()) {
mDicNodeState.mDicNodeStatePrevWord.setSecondWordFirstInputIndex( mDicNodeState.mDicNodeStateOutput.setSecondWordFirstInputIndex(
inputStateG->mInputIndex); inputStateG->mInputIndex);
} }
mDicNodeState.mDicNodeStateInput.updateInputIndexG(inputStateG->mPointerId, mDicNodeState.mDicNodeStateInput.updateInputIndexG(inputStateG->mPointerId,

View file

@ -29,16 +29,18 @@ namespace latinime {
class DicNodeProperties { class DicNodeProperties {
public: public:
AK_FORCE_INLINE DicNodeProperties() AK_FORCE_INLINE DicNodeProperties()
: mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0), : mPtNodePos(NOT_A_DICT_POS), mChildrenPtNodeArrayPos(NOT_A_DICT_POS),
mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false), mProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT),
mDepth(0), mLeavingDepth(0) {} mIsTerminal(false), mHasChildrenPtNodes(false),
mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0),
mPrevWordTerminalPtNodePos(NOT_A_DICT_POS) {}
~DicNodeProperties() {} ~DicNodeProperties() {}
// Should be called only once per DicNode is initialized. // Should be called only once per DicNode is initialized.
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability, void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord, const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t depth, const uint16_t leavingDepth) { const uint16_t depth, const uint16_t leavingDepth, const int prevWordNodePos) {
mPtNodePos = pos; mPtNodePos = pos;
mChildrenPtNodeArrayPos = childrenPos; mChildrenPtNodeArrayPos = childrenPos;
mDicNodeCodePoint = nodeCodePoint; mDicNodeCodePoint = nodeCodePoint;
@ -48,10 +50,24 @@ class DicNodeProperties {
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord; mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth; mDepth = depth;
mLeavingDepth = leavingDepth; mLeavingDepth = leavingDepth;
mPrevWordTerminalPtNodePos = prevWordNodePos;
} }
// Init for copy // Init for root with prevWordPtNodePos which is used for bigram
void init(const DicNodeProperties *const dicNodeProp) { void init(const int rootPtNodeArrayPos, const int prevWordNodePos) {
mPtNodePos = NOT_A_DICT_POS;
mChildrenPtNodeArrayPos = rootPtNodeArrayPos;
mDicNodeCodePoint = NOT_A_CODE_POINT;
mProbability = NOT_A_PROBABILITY;
mIsTerminal = false;
mHasChildrenPtNodes = true;
mIsBlacklistedOrNotAWord = false;
mDepth = 0;
mLeavingDepth = 0;
mPrevWordTerminalPtNodePos = prevWordNodePos;
}
void initByCopy(const DicNodeProperties *const dicNodeProp) {
mPtNodePos = dicNodeProp->mPtNodePos; mPtNodePos = dicNodeProp->mPtNodePos;
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos; mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint; mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
@ -61,6 +77,7 @@ class DicNodeProperties {
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth; mDepth = dicNodeProp->mDepth;
mLeavingDepth = dicNodeProp->mLeavingDepth; mLeavingDepth = dicNodeProp->mLeavingDepth;
mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos;
} }
// Init as passing child // Init as passing child
@ -74,6 +91,7 @@ class DicNodeProperties {
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
mLeavingDepth = dicNodeProp->mLeavingDepth; mLeavingDepth = dicNodeProp->mLeavingDepth;
mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos;
} }
int getPtNodePos() const { int getPtNodePos() const {
@ -113,6 +131,10 @@ class DicNodeProperties {
return mIsBlacklistedOrNotAWord; return mIsBlacklistedOrNotAWord;
} }
int getPrevWordTerminalPtNodePos() const {
return mPrevWordTerminalPtNodePos;
}
private: private:
// Caution!!! // Caution!!!
// Use a default copy constructor and an assign operator because shallow copies are ok // Use a default copy constructor and an assign operator because shallow copies are ok
@ -126,6 +148,7 @@ class DicNodeProperties {
bool mIsBlacklistedOrNotAWord; bool mIsBlacklistedOrNotAWord;
uint16_t mDepth; uint16_t mDepth;
uint16_t mLeavingDepth; uint16_t mLeavingDepth;
int mPrevWordTerminalPtNodePos;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_DIC_NODE_PROPERTIES_H #endif // LATINIME_DIC_NODE_PROPERTIES_H

View file

@ -20,7 +20,6 @@
#include "defines.h" #include "defines.h"
#include "suggest/core/dicnode/internal/dic_node_state_input.h" #include "suggest/core/dicnode/internal/dic_node_state_input.h"
#include "suggest/core/dicnode/internal/dic_node_state_output.h" #include "suggest/core/dicnode/internal/dic_node_state_output.h"
#include "suggest/core/dicnode/internal/dic_node_state_prevword.h"
#include "suggest/core/dicnode/internal/dic_node_state_scoring.h" #include "suggest/core/dicnode/internal/dic_node_state_scoring.h"
namespace latinime { namespace latinime {
@ -29,65 +28,50 @@ class DicNodeState {
public: public:
DicNodeStateInput mDicNodeStateInput; DicNodeStateInput mDicNodeStateInput;
DicNodeStateOutput mDicNodeStateOutput; DicNodeStateOutput mDicNodeStateOutput;
DicNodeStatePrevWord mDicNodeStatePrevWord;
DicNodeStateScoring mDicNodeStateScoring; DicNodeStateScoring mDicNodeStateScoring;
AK_FORCE_INLINE DicNodeState() AK_FORCE_INLINE DicNodeState()
: mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStatePrevWord(), : mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStateScoring() {}
mDicNodeStateScoring() {
}
~DicNodeState() {} ~DicNodeState() {}
DicNodeState &operator=(const DicNodeState& src) { DicNodeState &operator=(const DicNodeState& src) {
init(&src); initByCopy(&src);
return *this; return *this;
} }
DicNodeState(const DicNodeState& src) DicNodeState(const DicNodeState& src)
: mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStatePrevWord(), : mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStateScoring() {
mDicNodeStateScoring() { initByCopy(&src);
init(&src);
} }
// Init with prevWordPos // Init for root
void init(const int prevWordPos) { void init() {
mDicNodeStateInput.init(); mDicNodeStateInput.init();
mDicNodeStateOutput.init(); mDicNodeStateOutput.init();
mDicNodeStatePrevWord.init(prevWordPos);
mDicNodeStateScoring.init(); mDicNodeStateScoring.init();
} }
// Init with previous word. // Init with previous word.
void initAsRootWithPreviousWord(const DicNodeState *prevWordDicNodeState, void initAsRootWithPreviousWord(const DicNodeState *prevWordDicNodeState,
const int prevWordPos, const int prevWordCodePointCount) { const int prevWordCodePointCount) {
mDicNodeStateOutput.init(); // reset for next word mDicNodeStateOutput.init(&prevWordDicNodeState->mDicNodeStateOutput);
mDicNodeStateInput.init( mDicNodeStateInput.init(
&prevWordDicNodeState->mDicNodeStateInput, true /* resetTerminalDiffCost */); &prevWordDicNodeState->mDicNodeStateInput, true /* resetTerminalDiffCost */);
mDicNodeStateScoring.init(&prevWordDicNodeState->mDicNodeStateScoring); mDicNodeStateScoring.initByCopy(&prevWordDicNodeState->mDicNodeStateScoring);
mDicNodeStatePrevWord.init(
prevWordDicNodeState->mDicNodeStatePrevWord.getPrevWordCount() + 1,
prevWordPos,
prevWordDicNodeState->mDicNodeStatePrevWord.getPrevWordBuf(),
prevWordDicNodeState->mDicNodeStatePrevWord.getPrevWordLength(),
prevWordDicNodeState->mDicNodeStateOutput.getCodePointBuf(),
prevWordCodePointCount,
prevWordDicNodeState->mDicNodeStatePrevWord.getSecondWordFirstInputIndex(),
prevWordDicNodeState->mDicNodeStateInput.getInputIndex(0) /* lastInputIndex */);
} }
// Init by copy // Init by copy
AK_FORCE_INLINE void init(const DicNodeState *const src) { AK_FORCE_INLINE void initByCopy(const DicNodeState *const src) {
mDicNodeStateInput.init(&src->mDicNodeStateInput); mDicNodeStateInput.initByCopy(&src->mDicNodeStateInput);
mDicNodeStateOutput.init(&src->mDicNodeStateOutput); mDicNodeStateOutput.initByCopy(&src->mDicNodeStateOutput);
mDicNodeStatePrevWord.init(&src->mDicNodeStatePrevWord); mDicNodeStateScoring.initByCopy(&src->mDicNodeStateScoring);
mDicNodeStateScoring.init(&src->mDicNodeStateScoring);
} }
// Init by copy and adding merged node code points. // Init by copy and adding merged node code points.
void init(const DicNodeState *const src, const uint16_t mergedNodeCodePointCount, void init(const DicNodeState *const src, const uint16_t mergedNodeCodePointCount,
const int *const mergedNodeCodePoints) { const int *const mergedNodeCodePoints) {
init(src); initByCopy(src);
mDicNodeStateOutput.addMergedNodeCodePoints( mDicNodeStateOutput.addMergedNodeCodePoints(
mergedNodeCodePointCount, mergedNodeCodePoints); mergedNodeCodePointCount, mergedNodeCodePoints);
} }

View file

@ -53,7 +53,7 @@ class DicNodeStateInput {
mTerminalDiffCost[pointerId] = terminalDiffCost; mTerminalDiffCost[pointerId] = terminalDiffCost;
} }
void init(const DicNodeStateInput *const src) { void initByCopy(const DicNodeStateInput *const src) {
init(src, false); init(src, false);
} }

View file

@ -25,24 +25,53 @@
namespace latinime { namespace latinime {
// Class to have information to be output. This can contain previous words when the suggestion
// is a multi-word suggestion.
class DicNodeStateOutput { class DicNodeStateOutput {
public: public:
DicNodeStateOutput() : mOutputtedCodePointCount(0) {} DicNodeStateOutput()
: mOutputtedCodePointCount(0), mCurrentWordStart(0), mPrevWordCount(0),
mPrevWordsLength(0), mPrevWordStart(0), mSecondWordFirstInputIndex(NOT_AN_INDEX) {}
~DicNodeStateOutput() {} ~DicNodeStateOutput() {}
// Init for root
void init() { void init() {
mOutputtedCodePointCount = 0; mOutputtedCodePointCount = 0;
mCodePointsBuf[0] = 0; mCurrentWordStart = 0;
mOutputCodePoints[0] = 0;
mPrevWordCount = 0;
mPrevWordsLength = 0;
mPrevWordStart = 0;
mSecondWordFirstInputIndex = NOT_AN_INDEX;
} }
// Init for next word.
void init(const DicNodeStateOutput *const stateOutput) { void init(const DicNodeStateOutput *const stateOutput) {
memmove(mCodePointsBuf, stateOutput->mCodePointsBuf, mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount + 1;
stateOutput->mOutputtedCodePointCount * sizeof(mCodePointsBuf[0])); memmove(mOutputCodePoints, stateOutput->mOutputCodePoints,
stateOutput->mOutputtedCodePointCount * sizeof(mOutputCodePoints[0]));
mOutputCodePoints[stateOutput->mOutputtedCodePointCount] = KEYCODE_SPACE;
mCurrentWordStart = stateOutput->mOutputtedCodePointCount + 1;
mPrevWordCount = std::min(static_cast<int16_t>(stateOutput->mPrevWordCount + 1),
static_cast<int16_t>(MAX_RESULTS));
mPrevWordsLength = stateOutput->mOutputtedCodePointCount + 1;
mPrevWordStart = stateOutput->mCurrentWordStart;
mSecondWordFirstInputIndex = stateOutput->mSecondWordFirstInputIndex;
}
void initByCopy(const DicNodeStateOutput *const stateOutput) {
memmove(mOutputCodePoints, stateOutput->mOutputCodePoints,
stateOutput->mOutputtedCodePointCount * sizeof(mOutputCodePoints[0]));
mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount; mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount;
if (mOutputtedCodePointCount < MAX_WORD_LENGTH) { if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
mCodePointsBuf[mOutputtedCodePointCount] = 0; mOutputCodePoints[mOutputtedCodePointCount] = 0;
} }
mCurrentWordStart = stateOutput->mCurrentWordStart;
mPrevWordCount = stateOutput->mPrevWordCount;
mPrevWordsLength = stateOutput->mPrevWordsLength;
mPrevWordStart = stateOutput->mPrevWordStart;
mSecondWordFirstInputIndex = stateOutput->mSecondWordFirstInputIndex;
} }
void addMergedNodeCodePoints(const uint16_t mergedNodeCodePointCount, void addMergedNodeCodePoints(const uint16_t mergedNodeCodePointCount,
@ -51,29 +80,72 @@ class DicNodeStateOutput {
const int additionalCodePointCount = std::min( const int additionalCodePointCount = std::min(
static_cast<int>(mergedNodeCodePointCount), static_cast<int>(mergedNodeCodePointCount),
MAX_WORD_LENGTH - mOutputtedCodePointCount); MAX_WORD_LENGTH - mOutputtedCodePointCount);
memmove(&mCodePointsBuf[mOutputtedCodePointCount], mergedNodeCodePoints, memmove(&mOutputCodePoints[mOutputtedCodePointCount], mergedNodeCodePoints,
additionalCodePointCount * sizeof(mCodePointsBuf[0])); additionalCodePointCount * sizeof(mOutputCodePoints[0]));
mOutputtedCodePointCount = static_cast<uint16_t>( mOutputtedCodePointCount = static_cast<uint16_t>(
mOutputtedCodePointCount + mergedNodeCodePointCount); mOutputtedCodePointCount + additionalCodePointCount);
if (mOutputtedCodePointCount < MAX_WORD_LENGTH) { if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
mCodePointsBuf[mOutputtedCodePointCount] = 0; mOutputCodePoints[mOutputtedCodePointCount] = 0;
} }
} }
} }
int getCodePointAt(const int index) const { int getCurrentWordCodePointAt(const int index) const {
return mCodePointsBuf[index]; return mOutputCodePoints[mCurrentWordStart + index];
} }
const int *getCodePointBuf() const { const int *getCodePointBuf() const {
return mCodePointsBuf; return mOutputCodePoints;
}
void setSecondWordFirstInputIndex(const int inputIndex) {
mSecondWordFirstInputIndex = inputIndex;
}
int getSecondWordFirstInputIndex() const {
return mSecondWordFirstInputIndex;
}
// TODO: remove
int16_t getPrevWordsLength() const {
return mPrevWordsLength;
}
int16_t getPrevWordCount() const {
return mPrevWordCount;
}
int16_t getPrevWordStart() const {
return mPrevWordStart;
}
int getOutputCodePointAt(const int id) const {
return mOutputCodePoints[id];
} }
private: private:
DISALLOW_COPY_AND_ASSIGN(DicNodeStateOutput); DISALLOW_COPY_AND_ASSIGN(DicNodeStateOutput);
// When the DicNode represents "this is a pen":
// mOutputtedCodePointCount is 13, which is total code point count of "this is a pen" including
// spaces.
// mCurrentWordStart indicates the head of "pen", thus it is 10.
// This contains 3 previous words, "this", "is" and "a"; thus, mPrevWordCount is 3.
// mPrevWordsLength is length of "this is a ", which is 10.
// mPrevWordStart is the start index of "a"; thus, it is 8.
// mSecondWordFirstInputIndex is the first input index of "is".
uint16_t mOutputtedCodePointCount; uint16_t mOutputtedCodePointCount;
int mCodePointsBuf[MAX_WORD_LENGTH]; int mOutputCodePoints[MAX_WORD_LENGTH];
int16_t mCurrentWordStart;
// Previous word count in mOutputCodePoints.
int16_t mPrevWordCount;
// Total length of previous words in mOutputCodePoints. This is being used by the algorithm
// that may want to look at the previous word information.
int16_t mPrevWordsLength;
// Start index of the previous word in mOutputCodePoints. This is being used for auto commit.
int16_t mPrevWordStart;
int mSecondWordFirstInputIndex;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_DIC_NODE_STATE_OUTPUT_H #endif // LATINIME_DIC_NODE_STATE_OUTPUT_H

View file

@ -1,117 +0,0 @@
/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_DIC_NODE_STATE_PREVWORD_H
#define LATINIME_DIC_NODE_STATE_PREVWORD_H
#include <algorithm>
#include <cstring> // for memset() and memmove()
#include <stdint.h>
#include "defines.h"
#include "suggest/core/dicnode/dic_node_utils.h"
#include "suggest/core/layout/proximity_info_state.h"
namespace latinime {
class DicNodeStatePrevWord {
public:
AK_FORCE_INLINE DicNodeStatePrevWord()
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0),
mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {}
~DicNodeStatePrevWord() {}
void init(const int prevWordNodePos) {
mPrevWordLength = 0;
mPrevWordCount = 0;
mPrevWordStart = 0;
mPrevWordPtNodePos = prevWordNodePos;
mSecondWordFirstInputIndex = NOT_AN_INDEX;
mPrevWord[0] = 0;
}
// Init by copy
AK_FORCE_INLINE void init(const DicNodeStatePrevWord *const prevWord) {
mPrevWordLength = prevWord->mPrevWordLength;
mPrevWordCount = prevWord->mPrevWordCount;
mPrevWordStart = prevWord->mPrevWordStart;
mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos;
mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex;
memmove(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
}
void init(const int16_t prevWordCount, const int prevWordNodePos, const int *const src0,
const int16_t length0, const int *const src1, const int16_t length1,
const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) {
mPrevWordCount = std::min(prevWordCount, static_cast<int16_t>(MAX_RESULTS));
mPrevWordPtNodePos = prevWordNodePos;
int twoWordsLen =
DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord);
if (twoWordsLen >= MAX_WORD_LENGTH) {
twoWordsLen = MAX_WORD_LENGTH - 1;
}
mPrevWord[twoWordsLen] = KEYCODE_SPACE;
mPrevWordStart = length0;
mPrevWordLength = static_cast<int16_t>(twoWordsLen + 1);
mSecondWordFirstInputIndex = prevWordSecondWordFirstInputIndex;
}
void setSecondWordFirstInputIndex(const int inputIndex) {
mSecondWordFirstInputIndex = inputIndex;
}
int getSecondWordFirstInputIndex() const {
return mSecondWordFirstInputIndex;
}
// TODO: remove
int16_t getPrevWordLength() const {
return mPrevWordLength;
}
int16_t getPrevWordCount() const {
return mPrevWordCount;
}
int16_t getPrevWordStart() const {
return mPrevWordStart;
}
int getPrevWordPtNodePos() const {
return mPrevWordPtNodePos;
}
int getPrevWordCodePointAt(const int id) const {
return mPrevWord[id];
}
const int *getPrevWordBuf() const {
return mPrevWord;
}
private:
DISALLOW_COPY_AND_ASSIGN(DicNodeStatePrevWord);
int16_t mPrevWordCount;
int16_t mPrevWordLength;
int16_t mPrevWordStart;
int mPrevWordPtNodePos;
int mSecondWordFirstInputIndex;
int mPrevWord[MAX_WORD_LENGTH];
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_STATE_PREVWORD_H

View file

@ -53,7 +53,7 @@ class DicNodeStateScoring {
mContainedErrorTypes = ErrorTypeUtils::NOT_AN_ERROR; mContainedErrorTypes = ErrorTypeUtils::NOT_AN_ERROR;
} }
AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) { AK_FORCE_INLINE void initByCopy(const DicNodeStateScoring *const scoring) {
mEditCorrectionCount = scoring->mEditCorrectionCount; mEditCorrectionCount = scoring->mEditCorrectionCount;
mProximityCorrectionCount = scoring->mProximityCorrectionCount; mProximityCorrectionCount = scoring->mProximityCorrectionCount;
mCompletionCount = scoring->mCompletionCount; mCompletionCount = scoring->mCompletionCount;