Merge DicNodeStatePrevWord into DicNoteStateOutput.

Before:
(0)  2232.70 (0.86%)
(1)  255258.50 (98.89%)
(2)  585.73 (0.23%)
(66)  0.26 (0.00%)
Total 258126.46 (sum of others 258077.18)

After:
(0)  2249.23 (0.93%)
(1)  239883.63 (98.83%)
(2)  554.82 (0.23%)
(66)  0.35 (0.00%)
Total 242734.38 (sum of others 242688.04)

Change-Id: I9760cae5b98b3d1f4804b6b60317887eaa3ff71c
This commit is contained in:
Keisuke Kuroyanagi 2014-03-25 18:07:09 +09:00
parent adfb262797
commit eddbb7ac88
7 changed files with 180 additions and 222 deletions

View file

@ -20,29 +20,34 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node_profiler.h"
#include "suggest/core/dicnode/dic_node_release_listener.h"
#include "suggest/core/dicnode/dic_node_utils.h"
#include "suggest/core/dicnode/internal/dic_node_state.h"
#include "suggest/core/dicnode/internal/dic_node_properties.h"
#include "suggest/core/dictionary/digraph_utils.h"
#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/layout/proximity_info_state.h"
#include "utils/char_utils.h"
#if DEBUG_DICT
#define LOGI_SHOW_ADD_COST_PROP \
do { char charBuf[50]; \
INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
__FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0)
do { \
char charBuf[50]; \
INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
__FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
getInputIndex(0), getNormalizedCompoundDistance(), charBuf); \
} while (0)
#define DUMP_WORD_AND_SCORE(header) \
do { char charBuf[50]; char prevWordCharBuf[50]; \
INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.getPrevWordBuf(), \
mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \
NELEMS(prevWordCharBuf)); \
AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d, %5f,", header, \
getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \
getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \
getInputIndex(0), getNormalizedCompoundDistanceAfterFirstWord()); \
do { \
char charBuf[50]; \
INTS_TO_CHARS(getOutputWordBuf(), \
getNodeCodePointCount() \
+ mDicNodeState.mDicNodeStateOutput.getPrevWordsLength(), \
charBuf, NELEMS(charBuf)); \
AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %d, %5f,", header, \
getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \
getNormalizedCompoundDistance(), getRawLength(), charBuf, \
getInputIndex(0), getNormalizedCompoundDistanceAfterFirstWord()); \
} while (0)
#else
#define LOGI_SHOW_ADD_COST_PROP
@ -103,8 +108,8 @@ class DicNode {
void initByCopy(const DicNode *const dicNode) {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(&dicNode->mDicNodeProperties);
mDicNodeState.init(&dicNode->mDicNodeState);
mDicNodeProperties.initByCopy(&dicNode->mDicNodeProperties);
mDicNodeState.initByCopy(&dicNode->mDicNodeState);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
@ -112,12 +117,8 @@ class DicNode {
void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) {
mIsUsed = true;
mIsCachedForNextSuggestion = false;
mDicNodeProperties.init(
NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */);
mDicNodeState.init(prevWordPtNodePos);
mDicNodeProperties.init(rootPtNodeArrayPos, prevWordPtNodePos);
mDicNodeState.init();
PROF_NODE_RESET(mProfiler);
}
@ -125,13 +126,8 @@ class DicNode {
void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(
NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */);
mDicNodeProperties.init(rootPtNodeArrayPos, dicNode->mDicNodeProperties.getPtNodePos());
mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState,
dicNode->mDicNodeProperties.getPtNodePos(),
dicNode->mDicNodeProperties.getDepth());
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
@ -141,7 +137,7 @@ class DicNode {
mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
const int parentCodePoint = parentDicNode->getNodeTypedCodePoint();
mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint);
mDicNodeState.init(&parentDicNode->mDicNodeState);
mDicNodeState.initByCopy(&parentDicNode->mDicNodeState);
PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler);
}
@ -156,7 +152,7 @@ class DicNode {
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
newLeavingDepth);
newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordTerminalPtNodePos());
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
@ -200,7 +196,7 @@ class DicNode {
// Used to expand the node in DicNodeUtils
int getNodeTypedCodePoint() const {
return mDicNodeState.mDicNodeStateOutput.getCodePointAt(getNodeCodePointCount());
return mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(getNodeCodePointCount());
}
// Check if the current word and the previous word can be considered as a valid multiple word
@ -211,19 +207,19 @@ class DicNode {
}
// Treat suggestion as invalid if the current and the previous word are single character
// words.
const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1;
const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
- mDicNodeState.mDicNodeStateOutput.getPrevWordStart() - 1;
const int currentWordLen = getNodeCodePointCount();
return (prevWordLen != 1 || currentWordLen != 1);
}
bool isFirstCharUppercase() const {
const int c = mDicNodeState.mDicNodeStateOutput.getCodePointAt(0);
const int c = mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(0);
return CharUtils::isAsciiUpper(c);
}
bool isFirstWord() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS;
return mDicNodeProperties.getPrevWordTerminalPtNodePos() == NOT_A_DICT_POS;
}
bool isCompletion(const int inputSize) const {
@ -241,7 +237,7 @@ class DicNode {
// Used to get bigram probability in DicNodeUtils
int getPrevWordTerminalPtNodePos() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
return mDicNodeProperties.getPrevWordTerminalPtNodePos();
}
// Used in DicNodeUtils
@ -263,8 +259,8 @@ class DicNode {
bool shouldBeFilteredBySafetyNetForBigram() const {
const uint16_t currentDepth = getNodeCodePointCount();
const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1;
const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
- mDicNodeState.mDicNodeStateOutput.getPrevWordStart() - 1;
return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1));
}
@ -277,7 +273,7 @@ class DicNode {
}
bool isTotalInputSizeExceedingLimit() const {
const int prevWordsLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
const int prevWordsLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength();
const int currentWordDepth = getNodeCodePointCount();
// TODO: 3 can be 2? Needs to be investigated.
// TODO: Have a const variable for 3 (or 2)
@ -285,25 +281,24 @@ class DicNode {
}
void outputResult(int *dest) const {
const uint16_t prevWordLength = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
const uint16_t prevWordLength = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength();
const uint16_t currentDepth = getNodeCodePointCount();
DicNodeUtils::appendTwoWords(mDicNodeState.mDicNodeStatePrevWord.getPrevWordBuf(),
prevWordLength, getOutputWordBuf(), currentDepth, dest);
memmove(dest, getOutputWordBuf(), (prevWordLength + currentDepth) * sizeof(dest[0]));
DUMP_WORD_AND_SCORE("OUTPUT");
}
// "Total" in this context (and other methods in this class) means the whole suggestion. When
// this represents a multi-word suggestion, the referenced PtNode (in mDicNodeState) is only
// the one that corresponds to the last word of the suggestion, and all the previous words
// are concatenated together in mPrevWord - which contains a space at the end.
// are concatenated together in mDicNodeStateOutput.
int getTotalNodeSpaceCount() const {
if (isFirstWord()) return 0;
return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStatePrevWord.getPrevWordBuf(),
mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength());
return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStateOutput.getCodePointBuf(),
mDicNodeState.mDicNodeStateOutput.getPrevWordsLength());
}
int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const {
const int inputIndex = mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex();
const int inputIndex = mDicNodeState.mDicNodeStateOutput.getSecondWordFirstInputIndex();
if (inputIndex == NOT_AN_INDEX) {
return NOT_AN_INDEX;
} else {
@ -312,7 +307,7 @@ class DicNode {
}
bool hasMultipleWords() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() > 0;
return mDicNodeState.mDicNodeStateOutput.getPrevWordCount() > 0;
}
int getProximityCorrectionCount() const {
@ -346,7 +341,7 @@ class DicNode {
// Used to commit input partially
int getPrevWordPtNodePos() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
return mDicNodeProperties.getPrevWordTerminalPtNodePos();
}
AK_FORCE_INLINE const int *getOutputWordBuf() const {
@ -425,7 +420,7 @@ class DicNode {
float getLanguageDistanceRatePerWordForScoring() const {
const float langDist = getLanguageDistanceForScoring();
const float totalWordCount =
static_cast<float>(mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1);
static_cast<float>(mDicNodeState.mDicNodeStateOutput.getPrevWordCount() + 1);
return langDist / totalWordCount;
}
@ -469,7 +464,7 @@ class DicNode {
// Returns code point count including spaces
inline uint16_t getTotalNodeCodePointCount() const {
return getNodeCodePointCount() + mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
return getNodeCodePointCount() + mDicNodeState.mDicNodeStateOutput.getPrevWordsLength();
}
AK_FORCE_INLINE void dump(const char *tag) const {
@ -516,8 +511,9 @@ class DicNode {
return depthDiff > 0;
}
for (int i = 0; i < depth; ++i) {
const int codePoint = mDicNodeState.mDicNodeStateOutput.getCodePointAt(i);
const int rightCodePoint = right->mDicNodeState.mDicNodeStateOutput.getCodePointAt(i);
const int codePoint = mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(i);
const int rightCodePoint =
right->mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(i);
if (codePoint != rightCodePoint) {
return rightCodePoint > codePoint;
}
@ -574,8 +570,8 @@ class DicNode {
}
AK_FORCE_INLINE void updateInputIndexG(const DicNode_InputStateG *const inputStateG) {
if (mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() == 1 && isFirstLetter()) {
mDicNodeState.mDicNodeStatePrevWord.setSecondWordFirstInputIndex(
if (mDicNodeState.mDicNodeStateOutput.getPrevWordCount() == 1 && isFirstLetter()) {
mDicNodeState.mDicNodeStateOutput.setSecondWordFirstInputIndex(
inputStateG->mInputIndex);
}
mDicNodeState.mDicNodeStateInput.updateInputIndexG(inputStateG->mPointerId,

View file

@ -29,16 +29,18 @@ namespace latinime {
class DicNodeProperties {
public:
AK_FORCE_INLINE DicNodeProperties()
: mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0),
mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false),
mDepth(0), mLeavingDepth(0) {}
: mPtNodePos(NOT_A_DICT_POS), mChildrenPtNodeArrayPos(NOT_A_DICT_POS),
mProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT),
mIsTerminal(false), mHasChildrenPtNodes(false),
mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0),
mPrevWordTerminalPtNodePos(NOT_A_DICT_POS) {}
~DicNodeProperties() {}
// Should be called only once per DicNode is initialized.
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t depth, const uint16_t leavingDepth) {
const uint16_t depth, const uint16_t leavingDepth, const int prevWordNodePos) {
mPtNodePos = pos;
mChildrenPtNodeArrayPos = childrenPos;
mDicNodeCodePoint = nodeCodePoint;
@ -48,10 +50,24 @@ class DicNodeProperties {
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth;
mLeavingDepth = leavingDepth;
mPrevWordTerminalPtNodePos = prevWordNodePos;
}
// Init for copy
void init(const DicNodeProperties *const dicNodeProp) {
// Init for root with prevWordPtNodePos which is used for bigram
void init(const int rootPtNodeArrayPos, const int prevWordNodePos) {
mPtNodePos = NOT_A_DICT_POS;
mChildrenPtNodeArrayPos = rootPtNodeArrayPos;
mDicNodeCodePoint = NOT_A_CODE_POINT;
mProbability = NOT_A_PROBABILITY;
mIsTerminal = false;
mHasChildrenPtNodes = true;
mIsBlacklistedOrNotAWord = false;
mDepth = 0;
mLeavingDepth = 0;
mPrevWordTerminalPtNodePos = prevWordNodePos;
}
void initByCopy(const DicNodeProperties *const dicNodeProp) {
mPtNodePos = dicNodeProp->mPtNodePos;
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
@ -61,6 +77,7 @@ class DicNodeProperties {
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth;
mLeavingDepth = dicNodeProp->mLeavingDepth;
mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos;
}
// Init as passing child
@ -74,6 +91,7 @@ class DicNodeProperties {
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
mLeavingDepth = dicNodeProp->mLeavingDepth;
mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos;
}
int getPtNodePos() const {
@ -113,6 +131,10 @@ class DicNodeProperties {
return mIsBlacklistedOrNotAWord;
}
int getPrevWordTerminalPtNodePos() const {
return mPrevWordTerminalPtNodePos;
}
private:
// Caution!!!
// Use a default copy constructor and an assign operator because shallow copies are ok
@ -126,6 +148,7 @@ class DicNodeProperties {
bool mIsBlacklistedOrNotAWord;
uint16_t mDepth;
uint16_t mLeavingDepth;
int mPrevWordTerminalPtNodePos;
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_PROPERTIES_H

View file

@ -20,7 +20,6 @@
#include "defines.h"
#include "suggest/core/dicnode/internal/dic_node_state_input.h"
#include "suggest/core/dicnode/internal/dic_node_state_output.h"
#include "suggest/core/dicnode/internal/dic_node_state_prevword.h"
#include "suggest/core/dicnode/internal/dic_node_state_scoring.h"
namespace latinime {
@ -29,65 +28,50 @@ class DicNodeState {
public:
DicNodeStateInput mDicNodeStateInput;
DicNodeStateOutput mDicNodeStateOutput;
DicNodeStatePrevWord mDicNodeStatePrevWord;
DicNodeStateScoring mDicNodeStateScoring;
AK_FORCE_INLINE DicNodeState()
: mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStatePrevWord(),
mDicNodeStateScoring() {
}
: mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStateScoring() {}
~DicNodeState() {}
DicNodeState &operator=(const DicNodeState& src) {
init(&src);
initByCopy(&src);
return *this;
}
DicNodeState(const DicNodeState& src)
: mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStatePrevWord(),
mDicNodeStateScoring() {
init(&src);
: mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStateScoring() {
initByCopy(&src);
}
// Init with prevWordPos
void init(const int prevWordPos) {
// Init for root
void init() {
mDicNodeStateInput.init();
mDicNodeStateOutput.init();
mDicNodeStatePrevWord.init(prevWordPos);
mDicNodeStateScoring.init();
}
// Init with previous word.
void initAsRootWithPreviousWord(const DicNodeState *prevWordDicNodeState,
const int prevWordPos, const int prevWordCodePointCount) {
mDicNodeStateOutput.init(); // reset for next word
const int prevWordCodePointCount) {
mDicNodeStateOutput.init(&prevWordDicNodeState->mDicNodeStateOutput);
mDicNodeStateInput.init(
&prevWordDicNodeState->mDicNodeStateInput, true /* resetTerminalDiffCost */);
mDicNodeStateScoring.init(&prevWordDicNodeState->mDicNodeStateScoring);
mDicNodeStatePrevWord.init(
prevWordDicNodeState->mDicNodeStatePrevWord.getPrevWordCount() + 1,
prevWordPos,
prevWordDicNodeState->mDicNodeStatePrevWord.getPrevWordBuf(),
prevWordDicNodeState->mDicNodeStatePrevWord.getPrevWordLength(),
prevWordDicNodeState->mDicNodeStateOutput.getCodePointBuf(),
prevWordCodePointCount,
prevWordDicNodeState->mDicNodeStatePrevWord.getSecondWordFirstInputIndex(),
prevWordDicNodeState->mDicNodeStateInput.getInputIndex(0) /* lastInputIndex */);
mDicNodeStateScoring.initByCopy(&prevWordDicNodeState->mDicNodeStateScoring);
}
// Init by copy
AK_FORCE_INLINE void init(const DicNodeState *const src) {
mDicNodeStateInput.init(&src->mDicNodeStateInput);
mDicNodeStateOutput.init(&src->mDicNodeStateOutput);
mDicNodeStatePrevWord.init(&src->mDicNodeStatePrevWord);
mDicNodeStateScoring.init(&src->mDicNodeStateScoring);
AK_FORCE_INLINE void initByCopy(const DicNodeState *const src) {
mDicNodeStateInput.initByCopy(&src->mDicNodeStateInput);
mDicNodeStateOutput.initByCopy(&src->mDicNodeStateOutput);
mDicNodeStateScoring.initByCopy(&src->mDicNodeStateScoring);
}
// Init by copy and adding merged node code points.
void init(const DicNodeState *const src, const uint16_t mergedNodeCodePointCount,
const int *const mergedNodeCodePoints) {
init(src);
initByCopy(src);
mDicNodeStateOutput.addMergedNodeCodePoints(
mergedNodeCodePointCount, mergedNodeCodePoints);
}

View file

@ -53,7 +53,7 @@ class DicNodeStateInput {
mTerminalDiffCost[pointerId] = terminalDiffCost;
}
void init(const DicNodeStateInput *const src) {
void initByCopy(const DicNodeStateInput *const src) {
init(src, false);
}

View file

@ -25,24 +25,53 @@
namespace latinime {
// Class to have information to be output. This can contain previous words when the suggestion
// is a multi-word suggestion.
class DicNodeStateOutput {
public:
DicNodeStateOutput() : mOutputtedCodePointCount(0) {}
DicNodeStateOutput()
: mOutputtedCodePointCount(0), mCurrentWordStart(0), mPrevWordCount(0),
mPrevWordsLength(0), mPrevWordStart(0), mSecondWordFirstInputIndex(NOT_AN_INDEX) {}
~DicNodeStateOutput() {}
// Init for root
void init() {
mOutputtedCodePointCount = 0;
mCodePointsBuf[0] = 0;
mCurrentWordStart = 0;
mOutputCodePoints[0] = 0;
mPrevWordCount = 0;
mPrevWordsLength = 0;
mPrevWordStart = 0;
mSecondWordFirstInputIndex = NOT_AN_INDEX;
}
// Init for next word.
void init(const DicNodeStateOutput *const stateOutput) {
memmove(mCodePointsBuf, stateOutput->mCodePointsBuf,
stateOutput->mOutputtedCodePointCount * sizeof(mCodePointsBuf[0]));
mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount + 1;
memmove(mOutputCodePoints, stateOutput->mOutputCodePoints,
stateOutput->mOutputtedCodePointCount * sizeof(mOutputCodePoints[0]));
mOutputCodePoints[stateOutput->mOutputtedCodePointCount] = KEYCODE_SPACE;
mCurrentWordStart = stateOutput->mOutputtedCodePointCount + 1;
mPrevWordCount = std::min(static_cast<int16_t>(stateOutput->mPrevWordCount + 1),
static_cast<int16_t>(MAX_RESULTS));
mPrevWordsLength = stateOutput->mOutputtedCodePointCount + 1;
mPrevWordStart = stateOutput->mCurrentWordStart;
mSecondWordFirstInputIndex = stateOutput->mSecondWordFirstInputIndex;
}
void initByCopy(const DicNodeStateOutput *const stateOutput) {
memmove(mOutputCodePoints, stateOutput->mOutputCodePoints,
stateOutput->mOutputtedCodePointCount * sizeof(mOutputCodePoints[0]));
mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount;
if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
mCodePointsBuf[mOutputtedCodePointCount] = 0;
mOutputCodePoints[mOutputtedCodePointCount] = 0;
}
mCurrentWordStart = stateOutput->mCurrentWordStart;
mPrevWordCount = stateOutput->mPrevWordCount;
mPrevWordsLength = stateOutput->mPrevWordsLength;
mPrevWordStart = stateOutput->mPrevWordStart;
mSecondWordFirstInputIndex = stateOutput->mSecondWordFirstInputIndex;
}
void addMergedNodeCodePoints(const uint16_t mergedNodeCodePointCount,
@ -51,29 +80,72 @@ class DicNodeStateOutput {
const int additionalCodePointCount = std::min(
static_cast<int>(mergedNodeCodePointCount),
MAX_WORD_LENGTH - mOutputtedCodePointCount);
memmove(&mCodePointsBuf[mOutputtedCodePointCount], mergedNodeCodePoints,
additionalCodePointCount * sizeof(mCodePointsBuf[0]));
memmove(&mOutputCodePoints[mOutputtedCodePointCount], mergedNodeCodePoints,
additionalCodePointCount * sizeof(mOutputCodePoints[0]));
mOutputtedCodePointCount = static_cast<uint16_t>(
mOutputtedCodePointCount + mergedNodeCodePointCount);
mOutputtedCodePointCount + additionalCodePointCount);
if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
mCodePointsBuf[mOutputtedCodePointCount] = 0;
mOutputCodePoints[mOutputtedCodePointCount] = 0;
}
}
}
int getCodePointAt(const int index) const {
return mCodePointsBuf[index];
int getCurrentWordCodePointAt(const int index) const {
return mOutputCodePoints[mCurrentWordStart + index];
}
const int *getCodePointBuf() const {
return mCodePointsBuf;
return mOutputCodePoints;
}
void setSecondWordFirstInputIndex(const int inputIndex) {
mSecondWordFirstInputIndex = inputIndex;
}
int getSecondWordFirstInputIndex() const {
return mSecondWordFirstInputIndex;
}
// TODO: remove
int16_t getPrevWordsLength() const {
return mPrevWordsLength;
}
int16_t getPrevWordCount() const {
return mPrevWordCount;
}
int16_t getPrevWordStart() const {
return mPrevWordStart;
}
int getOutputCodePointAt(const int id) const {
return mOutputCodePoints[id];
}
private:
DISALLOW_COPY_AND_ASSIGN(DicNodeStateOutput);
// When the DicNode represents "this is a pen":
// mOutputtedCodePointCount is 13, which is total code point count of "this is a pen" including
// spaces.
// mCurrentWordStart indicates the head of "pen", thus it is 10.
// This contains 3 previous words, "this", "is" and "a"; thus, mPrevWordCount is 3.
// mPrevWordsLength is length of "this is a ", which is 10.
// mPrevWordStart is the start index of "a"; thus, it is 8.
// mSecondWordFirstInputIndex is the first input index of "is".
uint16_t mOutputtedCodePointCount;
int mCodePointsBuf[MAX_WORD_LENGTH];
int mOutputCodePoints[MAX_WORD_LENGTH];
int16_t mCurrentWordStart;
// Previous word count in mOutputCodePoints.
int16_t mPrevWordCount;
// Total length of previous words in mOutputCodePoints. This is being used by the algorithm
// that may want to look at the previous word information.
int16_t mPrevWordsLength;
// Start index of the previous word in mOutputCodePoints. This is being used for auto commit.
int16_t mPrevWordStart;
int mSecondWordFirstInputIndex;
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_STATE_OUTPUT_H

View file

@ -1,117 +0,0 @@
/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_DIC_NODE_STATE_PREVWORD_H
#define LATINIME_DIC_NODE_STATE_PREVWORD_H
#include <algorithm>
#include <cstring> // for memset() and memmove()
#include <stdint.h>
#include "defines.h"
#include "suggest/core/dicnode/dic_node_utils.h"
#include "suggest/core/layout/proximity_info_state.h"
namespace latinime {
class DicNodeStatePrevWord {
public:
AK_FORCE_INLINE DicNodeStatePrevWord()
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0),
mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {}
~DicNodeStatePrevWord() {}
void init(const int prevWordNodePos) {
mPrevWordLength = 0;
mPrevWordCount = 0;
mPrevWordStart = 0;
mPrevWordPtNodePos = prevWordNodePos;
mSecondWordFirstInputIndex = NOT_AN_INDEX;
mPrevWord[0] = 0;
}
// Init by copy
AK_FORCE_INLINE void init(const DicNodeStatePrevWord *const prevWord) {
mPrevWordLength = prevWord->mPrevWordLength;
mPrevWordCount = prevWord->mPrevWordCount;
mPrevWordStart = prevWord->mPrevWordStart;
mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos;
mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex;
memmove(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
}
void init(const int16_t prevWordCount, const int prevWordNodePos, const int *const src0,
const int16_t length0, const int *const src1, const int16_t length1,
const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) {
mPrevWordCount = std::min(prevWordCount, static_cast<int16_t>(MAX_RESULTS));
mPrevWordPtNodePos = prevWordNodePos;
int twoWordsLen =
DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord);
if (twoWordsLen >= MAX_WORD_LENGTH) {
twoWordsLen = MAX_WORD_LENGTH - 1;
}
mPrevWord[twoWordsLen] = KEYCODE_SPACE;
mPrevWordStart = length0;
mPrevWordLength = static_cast<int16_t>(twoWordsLen + 1);
mSecondWordFirstInputIndex = prevWordSecondWordFirstInputIndex;
}
void setSecondWordFirstInputIndex(const int inputIndex) {
mSecondWordFirstInputIndex = inputIndex;
}
int getSecondWordFirstInputIndex() const {
return mSecondWordFirstInputIndex;
}
// TODO: remove
int16_t getPrevWordLength() const {
return mPrevWordLength;
}
int16_t getPrevWordCount() const {
return mPrevWordCount;
}
int16_t getPrevWordStart() const {
return mPrevWordStart;
}
int getPrevWordPtNodePos() const {
return mPrevWordPtNodePos;
}
int getPrevWordCodePointAt(const int id) const {
return mPrevWord[id];
}
const int *getPrevWordBuf() const {
return mPrevWord;
}
private:
DISALLOW_COPY_AND_ASSIGN(DicNodeStatePrevWord);
int16_t mPrevWordCount;
int16_t mPrevWordLength;
int16_t mPrevWordStart;
int mPrevWordPtNodePos;
int mSecondWordFirstInputIndex;
int mPrevWord[MAX_WORD_LENGTH];
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_STATE_PREVWORD_H

View file

@ -53,7 +53,7 @@ class DicNodeStateScoring {
mContainedErrorTypes = ErrorTypeUtils::NOT_AN_ERROR;
}
AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) {
AK_FORCE_INLINE void initByCopy(const DicNodeStateScoring *const scoring) {
mEditCorrectionCount = scoring->mEditCorrectionCount;
mProximityCorrectionCount = scoring->mProximityCorrectionCount;
mCompletionCount = scoring->mCompletionCount;