Refactor CorrectionState to Correction

Change-Id: I5f1ce35413731f930b43b1c82014e65d9eaa240b
main
satok 2011-08-10 14:30:10 +09:00
parent 11b7febc0b
commit cfca3c6317
6 changed files with 93 additions and 93 deletions

View File

@ -14,7 +14,7 @@ LOCAL_SRC_FILES := \
jni/jni_common.cpp \ jni/jni_common.cpp \
src/bigram_dictionary.cpp \ src/bigram_dictionary.cpp \
src/char_utils.cpp \ src/char_utils.cpp \
src/correction_state.cpp \ src/correction.cpp \
src/dictionary.cpp \ src/dictionary.cpp \
src/proximity_info.cpp \ src/proximity_info.cpp \
src/unigram_dictionary.cpp src/unigram_dictionary.cpp

View File

@ -18,9 +18,9 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#define LOG_TAG "LatinIME: correction_state.cpp" #define LOG_TAG "LatinIME: correction.cpp"
#include "correction_state.h" #include "correction.h"
#include "proximity_info.h" #include "proximity_info.h"
namespace latinime { namespace latinime {
@ -30,20 +30,20 @@ namespace latinime {
////////////////////// //////////////////////
static const char QUOTE = '\''; static const char QUOTE = '\'';
inline bool CorrectionState::isQuote(const unsigned short c) { inline bool Correction::isQuote(const unsigned short c) {
const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex); const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex);
return (c == QUOTE && userTypedChar != QUOTE); return (c == QUOTE && userTypedChar != QUOTE);
} }
///////////////////// ////////////////
// CorrectionState // // Correction //
///////////////////// ////////////////
CorrectionState::CorrectionState(const int typedLetterMultiplier, const int fullWordMultiplier) Correction::Correction(const int typedLetterMultiplier, const int fullWordMultiplier)
: TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier) { : TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier) {
} }
void CorrectionState::initCorrectionState(const ProximityInfo *pi, const int inputLength, void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
const int maxDepth) { const int maxDepth) {
mProximityInfo = pi; mProximityInfo = pi;
mInputLength = inputLength; mInputLength = inputLength;
@ -52,7 +52,7 @@ void CorrectionState::initCorrectionState(const ProximityInfo *pi, const int inp
mSkippedOutputIndex = -1; mSkippedOutputIndex = -1;
} }
void CorrectionState::setCorrectionParams(const int skipPos, const int excessivePos, void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
const int transposedPos, const int spaceProximityPos, const int missingSpacePos) { const int transposedPos, const int spaceProximityPos, const int missingSpacePos) {
mSkipPos = skipPos; mSkipPos = skipPos;
mExcessivePos = excessivePos; mExcessivePos = excessivePos;
@ -61,7 +61,7 @@ void CorrectionState::setCorrectionParams(const int skipPos, const int excessive
mMissingSpacePos = missingSpacePos; mMissingSpacePos = missingSpacePos;
} }
void CorrectionState::checkState() { void Correction::checkState() {
if (DEBUG_DICT) { if (DEBUG_DICT) {
int inputCount = 0; int inputCount = 0;
if (mSkipPos >= 0) ++inputCount; if (mSkipPos >= 0) ++inputCount;
@ -72,11 +72,11 @@ void CorrectionState::checkState() {
} }
} }
int CorrectionState::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq) { int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq) {
return CorrectionState::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this); return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this);
} }
int CorrectionState::getFinalFreq(const int freq, unsigned short **word, int *wordLength) { int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
const int outputIndex = mTerminalOutputIndex; const int outputIndex = mTerminalOutputIndex;
const int inputIndex = mTerminalInputIndex; const int inputIndex = mTerminalInputIndex;
*wordLength = outputIndex + 1; *wordLength = outputIndex + 1;
@ -86,11 +86,11 @@ int CorrectionState::getFinalFreq(const int freq, unsigned short **word, int *wo
*word = mWord; *word = mWord;
const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2) const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2)
: (mInputLength == inputIndex + 1); : (mInputLength == inputIndex + 1);
return CorrectionState::RankingAlgorithm::calculateFinalFreq( return Correction::RankingAlgorithm::calculateFinalFreq(
inputIndex, outputIndex, mMatchedCharCount, freq, sameLength, this); inputIndex, outputIndex, mMatchedCharCount, freq, sameLength, this);
} }
void CorrectionState::initProcessState(const int matchCount, const int inputIndex, void Correction::initProcessState(const int matchCount, const int inputIndex,
const int outputIndex, const bool traverseAllNodes, const int diffs) { const int outputIndex, const bool traverseAllNodes, const int diffs) {
mMatchedCharCount = matchCount; mMatchedCharCount = matchCount;
mInputIndex = inputIndex; mInputIndex = inputIndex;
@ -99,7 +99,7 @@ void CorrectionState::initProcessState(const int matchCount, const int inputInde
mDiffs = diffs; mDiffs = diffs;
} }
void CorrectionState::getProcessState(int *matchedCount, int *inputIndex, int *outputIndex, void Correction::getProcessState(int *matchedCount, int *inputIndex, int *outputIndex,
bool *traverseAllNodes, int *diffs) { bool *traverseAllNodes, int *diffs) {
*matchedCount = mMatchedCharCount; *matchedCount = mMatchedCharCount;
*inputIndex = mInputIndex; *inputIndex = mInputIndex;
@ -108,43 +108,43 @@ void CorrectionState::getProcessState(int *matchedCount, int *inputIndex, int *o
*diffs = mDiffs; *diffs = mDiffs;
} }
void CorrectionState::charMatched() { void Correction::charMatched() {
++mMatchedCharCount; ++mMatchedCharCount;
} }
// TODO: remove // TODO: remove
int CorrectionState::getOutputIndex() { int Correction::getOutputIndex() {
return mOutputIndex; return mOutputIndex;
} }
// TODO: remove // TODO: remove
int CorrectionState::getInputIndex() { int Correction::getInputIndex() {
return mInputIndex; return mInputIndex;
} }
// TODO: remove // TODO: remove
bool CorrectionState::needsToTraverseAll() { bool Correction::needsToTraverseAll() {
return mTraverseAllNodes; return mTraverseAllNodes;
} }
void CorrectionState::incrementInputIndex() { void Correction::incrementInputIndex() {
++mInputIndex; ++mInputIndex;
} }
void CorrectionState::incrementOutputIndex() { void Correction::incrementOutputIndex() {
++mOutputIndex; ++mOutputIndex;
} }
void CorrectionState::startTraverseAll() { void Correction::startTraverseAll() {
mTraverseAllNodes = true; mTraverseAllNodes = true;
} }
bool CorrectionState::needsToPrune() const { bool Correction::needsToPrune() const {
return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth) return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth)
|| mDiffs > mMaxEditDistance); || mDiffs > mMaxEditDistance);
} }
CorrectionState::CorrectionStateType CorrectionState::processSkipChar( Correction::CorrectionType Correction::processSkipChar(
const int32_t c, const bool isTerminal) { const int32_t c, const bool isTerminal) {
mWord[mOutputIndex] = c; mWord[mOutputIndex] = c;
if (needsToTraverseAll() && isTerminal) { if (needsToTraverseAll() && isTerminal) {
@ -158,9 +158,9 @@ CorrectionState::CorrectionStateType CorrectionState::processSkipChar(
} }
} }
CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState( Correction::CorrectionType Correction::processCharAndCalcState(
const int32_t c, const bool isTerminal) { const int32_t c, const bool isTerminal) {
CorrectionStateType currentStateType = NOT_ON_TERMINAL; CorrectionType currentStateType = NOT_ON_TERMINAL;
// This has to be done for each virtual char (this forwards the "inputIndex" which // This has to be done for each virtual char (this forwards the "inputIndex" which
// is the index in the user-inputted chars, as read by proximity chars. // is the index in the user-inputted chars, as read by proximity chars.
if (mExcessivePos == mOutputIndex && mInputIndex < mInputLength - 1) { if (mExcessivePos == mOutputIndex && mInputIndex < mInputLength - 1) {
@ -249,7 +249,7 @@ CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState(
return currentStateType; return currentStateType;
} }
CorrectionState::~CorrectionState() { Correction::~Correction() {
} }
///////////////////////// /////////////////////////
@ -302,17 +302,17 @@ inline static void multiplyRate(const int rate, int *freq) {
// RankingAlgorithm // // RankingAlgorithm //
////////////////////// //////////////////////
int CorrectionState::RankingAlgorithm::calculateFinalFreq( int Correction::RankingAlgorithm::calculateFinalFreq(
const int inputIndex, const int outputIndex, const int inputIndex, const int outputIndex,
const int matchCount, const int freq, const bool sameLength, const int matchCount, const int freq, const bool sameLength,
const CorrectionState* correctionState) { const Correction* correction) {
const int skipPos = correctionState->getSkipPos(); const int skipPos = correction->getSkipPos();
const int excessivePos = correctionState->getExcessivePos(); const int excessivePos = correction->getExcessivePos();
const int transposedPos = correctionState->getTransposedPos(); const int transposedPos = correction->getTransposedPos();
const int inputLength = correctionState->mInputLength; const int inputLength = correction->mInputLength;
const int typedLetterMultiplier = correctionState->TYPED_LETTER_MULTIPLIER; const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
const int fullWordMultiplier = correctionState->FULL_WORD_MULTIPLIER; const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
const ProximityInfo *proximityInfo = correctionState->mProximityInfo; const ProximityInfo *proximityInfo = correction->mProximityInfo;
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount); const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
// TODO: Demote by edit distance // TODO: Demote by edit distance
@ -370,10 +370,10 @@ int CorrectionState::RankingAlgorithm::calculateFinalFreq(
return finalFreq; return finalFreq;
} }
int CorrectionState::RankingAlgorithm::calcFreqForSplitTwoWords( int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
const int firstFreq, const int secondFreq, const CorrectionState* correctionState) { const int firstFreq, const int secondFreq, const Correction* correction) {
const int spaceProximityPos = correctionState->mSpaceProximityPos; const int spaceProximityPos = correction->mSpaceProximityPos;
const int missingSpacePos = correctionState->mMissingSpacePos; const int missingSpacePos = correction->mMissingSpacePos;
if (DEBUG_DICT) { if (DEBUG_DICT) {
int inputCount = 0; int inputCount = 0;
if (spaceProximityPos >= 0) ++inputCount; if (spaceProximityPos >= 0) ++inputCount;
@ -381,12 +381,12 @@ int CorrectionState::RankingAlgorithm::calcFreqForSplitTwoWords(
assert(inputCount <= 1); assert(inputCount <= 1);
} }
const bool isSpaceProximity = spaceProximityPos >= 0; const bool isSpaceProximity = spaceProximityPos >= 0;
const int inputLength = correctionState->mInputLength; const int inputLength = correction->mInputLength;
const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
const int secondWordLength = isSpaceProximity const int secondWordLength = isSpaceProximity
? (inputLength - spaceProximityPos - 1) ? (inputLength - spaceProximityPos - 1)
: (inputLength - missingSpacePos); : (inputLength - missingSpacePos);
const int typedLetterMultiplier = correctionState->TYPED_LETTER_MULTIPLIER; const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
if (firstWordLength == 0 || secondWordLength == 0) { if (firstWordLength == 0 || secondWordLength == 0) {
return 0; return 0;

View File

@ -14,8 +14,8 @@
* limitations under the License. * limitations under the License.
*/ */
#ifndef LATINIME_CORRECTION_STATE_H #ifndef LATINIME_CORRECTION_H
#define LATINIME_CORRECTION_STATE_H #define LATINIME_CORRECTION_H
#include <stdint.h> #include <stdint.h>
@ -25,7 +25,7 @@ namespace latinime {
class ProximityInfo; class ProximityInfo;
class CorrectionState { class Correction {
public: public:
typedef enum { typedef enum {
@ -34,10 +34,10 @@ public:
UNRELATED, UNRELATED,
ON_TERMINAL, ON_TERMINAL,
NOT_ON_TERMINAL NOT_ON_TERMINAL
} CorrectionStateType; } CorrectionType;
CorrectionState(const int typedLetterMultiplier, const int fullWordMultiplier); Correction(const int typedLetterMultiplier, const int fullWordMultiplier);
void initCorrectionState( void initCorrection(
const ProximityInfo *pi, const int inputLength, const int maxWordLength); const ProximityInfo *pi, const int inputLength, const int maxWordLength);
void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
const int spaceProximityPos, const int missingSpacePos); const int spaceProximityPos, const int missingSpacePos);
@ -50,7 +50,7 @@ public:
int getInputIndex(); int getInputIndex();
bool needsToTraverseAll(); bool needsToTraverseAll();
virtual ~CorrectionState(); virtual ~Correction();
int getSpaceProximityPos() const { int getSpaceProximityPos() const {
return mSpaceProximityPos; return mSpaceProximityPos;
} }
@ -75,7 +75,7 @@ public:
int getFreqForSplitTwoWords(const int firstFreq, const int secondFreq); int getFreqForSplitTwoWords(const int firstFreq, const int secondFreq);
int getFinalFreq(const int freq, unsigned short **word, int* wordLength); int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
CorrectionStateType processCharAndCalcState(const int32_t c, const bool isTerminal); CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
int getDiffs() const { int getDiffs() const {
return mDiffs; return mDiffs;
@ -117,16 +117,16 @@ private:
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
inline bool isQuote(const unsigned short c); inline bool isQuote(const unsigned short c);
inline CorrectionStateType processSkipChar(const int32_t c, const bool isTerminal); inline CorrectionType processSkipChar(const int32_t c, const bool isTerminal);
class RankingAlgorithm { class RankingAlgorithm {
public: public:
static int calculateFinalFreq(const int inputIndex, const int depth, static int calculateFinalFreq(const int inputIndex, const int depth,
const int matchCount, const int freq, const bool sameLength, const int matchCount, const int freq, const bool sameLength,
const CorrectionState* correctionState); const Correction* correction);
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq, static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
const CorrectionState* correctionState); const Correction* correction);
}; };
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_CORRECTION_INFO_H #endif // LATINIME_CORRECTION_H

View File

@ -23,7 +23,7 @@
namespace latinime { namespace latinime {
class CorrectionState; class Correction;
class ProximityInfo { class ProximityInfo {
public: public:

View File

@ -48,11 +48,11 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed
if (DEBUG_DICT) { if (DEBUG_DICT) {
LOGI("UnigramDictionary - constructor"); LOGI("UnigramDictionary - constructor");
} }
mCorrectionState = new CorrectionState(typedLetterMultiplier, fullWordMultiplier); mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier);
} }
UnigramDictionary::~UnigramDictionary() { UnigramDictionary::~UnigramDictionary() {
delete mCorrectionState; delete mCorrection;
} }
static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize, static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize,
@ -184,7 +184,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
if (DEBUG_DICT) assert(codesSize == mInputLength); if (DEBUG_DICT) assert(codesSize == mInputLength);
const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
mCorrectionState->initCorrectionState(mProximityInfo, mInputLength, maxDepth); mCorrection->initCorrection(mProximityInfo, mInputLength, maxDepth);
PROF_END(0); PROF_END(0);
PROF_START(1); PROF_START(1);
@ -237,7 +237,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
if (DEBUG_DICT) { if (DEBUG_DICT) {
LOGI("--- Suggest missing space characters %d", i); LOGI("--- Suggest missing space characters %d", i);
} }
getMissingSpaceWords(mInputLength, i, mCorrectionState); getMissingSpaceWords(mInputLength, i, mCorrection);
} }
} }
PROF_END(5); PROF_END(5);
@ -256,7 +256,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
i, x, y, proximityInfo->hasSpaceProximity(x, y)); i, x, y, proximityInfo->hasSpaceProximity(x, y));
} }
if (proximityInfo->hasSpaceProximity(x, y)) { if (proximityInfo->hasSpaceProximity(x, y)) {
getMistypedSpaceWords(mInputLength, i, mCorrectionState); getMistypedSpaceWords(mInputLength, i, mCorrection);
} }
} }
} }
@ -347,7 +347,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
assert(excessivePos < mInputLength); assert(excessivePos < mInputLength);
assert(missingPos < mInputLength); assert(missingPos < mInputLength);
} }
mCorrectionState->setCorrectionParams(skipPos, excessivePos, transposedPos, mCorrection->setCorrectionParams(skipPos, excessivePos, transposedPos,
-1 /* spaceProximityPos */, -1 /* missingSpacePos */); -1 /* spaceProximityPos */, -1 /* missingSpacePos */);
int rootPosition = ROOT_POS; int rootPosition = ROOT_POS;
// Get the number of children of root, then increment the position // Get the number of children of root, then increment the position
@ -368,13 +368,13 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
--mStackChildCount[depth]; --mStackChildCount[depth];
int siblingPos = mStackSiblingPos[depth]; int siblingPos = mStackSiblingPos[depth];
int firstChildPos; int firstChildPos;
mCorrectionState->initProcessState( mCorrection->initProcessState(
mStackMatchedCount[depth], mStackInputIndex[depth], mStackOutputIndex[depth], mStackMatchedCount[depth], mStackInputIndex[depth], mStackOutputIndex[depth],
mStackTraverseAll[depth], mStackDiffs[depth]); mStackTraverseAll[depth], mStackDiffs[depth]);
// needsToTraverseChildrenNodes should be false // needsToTraverseChildrenNodes should be false
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
mCorrectionState, &childCount, &firstChildPos, &siblingPos); mCorrection, &childCount, &firstChildPos, &siblingPos);
// Update next sibling pos // Update next sibling pos
mStackSiblingPos[depth] = siblingPos; mStackSiblingPos[depth] = siblingPos;
if (needsToTraverseChildrenNodes) { if (needsToTraverseChildrenNodes) {
@ -383,7 +383,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
mStackChildCount[depth] = childCount; mStackChildCount[depth] = childCount;
mStackSiblingPos[depth] = firstChildPos; mStackSiblingPos[depth] = firstChildPos;
mCorrectionState->getProcessState(&mStackMatchedCount[depth], mCorrection->getProcessState(&mStackMatchedCount[depth],
&mStackInputIndex[depth], &mStackOutputIndex[depth], &mStackInputIndex[depth], &mStackOutputIndex[depth],
&mStackTraverseAll[depth], &mStackDiffs[depth]); &mStackTraverseAll[depth], &mStackDiffs[depth]);
} }
@ -409,17 +409,17 @@ inline static void multiplyIntCapped(const int multiplier, int *base) {
} }
void UnigramDictionary::getMissingSpaceWords( void UnigramDictionary::getMissingSpaceWords(
const int inputLength, const int missingSpacePos, CorrectionState *correctionState) { const int inputLength, const int missingSpacePos, Correction *correction) {
correctionState->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos); -1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos);
getSplitTwoWordsSuggestion(inputLength, correctionState); getSplitTwoWordsSuggestion(inputLength, correction);
} }
void UnigramDictionary::getMistypedSpaceWords( void UnigramDictionary::getMistypedSpaceWords(
const int inputLength, const int spaceProximityPos, CorrectionState *correctionState) { const int inputLength, const int spaceProximityPos, Correction *correction) {
correctionState->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */); -1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */);
getSplitTwoWordsSuggestion(inputLength, correctionState); getSplitTwoWordsSuggestion(inputLength, correction);
} }
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c, inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
@ -429,19 +429,19 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth; return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
} }
inline void UnigramDictionary::onTerminal(const int freq, CorrectionState *correctionState) { inline void UnigramDictionary::onTerminal(const int freq, Correction *correction) {
int wordLength; int wordLength;
unsigned short* wordPointer; unsigned short* wordPointer;
const int finalFreq = correctionState->getFinalFreq(freq, &wordPointer, &wordLength); const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
if (finalFreq >= 0) { if (finalFreq >= 0) {
addWord(wordPointer, wordLength, finalFreq); addWord(wordPointer, wordLength, finalFreq);
} }
} }
void UnigramDictionary::getSplitTwoWordsSuggestion( void UnigramDictionary::getSplitTwoWordsSuggestion(
const int inputLength, CorrectionState* correctionState) { const int inputLength, Correction* correction) {
const int spaceProximityPos = correctionState->getSpaceProximityPos(); const int spaceProximityPos = correction->getSpaceProximityPos();
const int missingSpacePos = correctionState->getMissingSpacePos(); const int missingSpacePos = correction->getMissingSpacePos();
if (DEBUG_DICT) { if (DEBUG_DICT) {
int inputCount = 0; int inputCount = 0;
if (spaceProximityPos >= 0) ++inputCount; if (spaceProximityPos >= 0) ++inputCount;
@ -485,7 +485,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestion(
word[i] = mWord[i - firstWordLength - 1]; word[i] = mWord[i - firstWordLength - 1];
} }
const int pairFreq = mCorrectionState->getFreqForSplitTwoWords(firstFreq, secondFreq); const int pairFreq = mCorrection->getFreqForSplitTwoWords(firstFreq, secondFreq);
if (DEBUG_DICT) { if (DEBUG_DICT) {
LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
} }
@ -650,10 +650,10 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
// the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any // the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any
// given level, as output into newCount when traversing this level's parent. // given level, as output into newCount when traversing this level's parent.
inline bool UnigramDictionary::processCurrentNode(const int initialPos, inline bool UnigramDictionary::processCurrentNode(const int initialPos,
CorrectionState *correctionState, int *newCount, Correction *correction, int *newCount,
int *newChildrenPosition, int *nextSiblingPosition) { int *newChildrenPosition, int *nextSiblingPosition) {
if (DEBUG_DICT) { if (DEBUG_DICT) {
correctionState->checkState(); correction->checkState();
} }
int pos = initialPos; int pos = initialPos;
@ -697,12 +697,12 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// If we are on the last char, this virtual node is a terminal if this node is. // If we are on the last char, this virtual node is a terminal if this node is.
const bool isTerminal = isLastChar && isTerminalNode; const bool isTerminal = isLastChar && isTerminalNode;
CorrectionState::CorrectionStateType stateType = correctionState->processCharAndCalcState( Correction::CorrectionType stateType = correction->processCharAndCalcState(
c, isTerminal); c, isTerminal);
if (stateType == CorrectionState::TRAVERSE_ALL_ON_TERMINAL if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL
|| stateType == CorrectionState::ON_TERMINAL) { || stateType == Correction::ON_TERMINAL) {
needsToInvokeOnTerminal = true; needsToInvokeOnTerminal = true;
} else if (stateType == CorrectionState::UNRELATED) { } else if (stateType == Correction::UNRELATED) {
// We found that this is an unrelated character, so we should give up traversing // We found that this is an unrelated character, so we should give up traversing
// this node and its children entirely. // this node and its children entirely.
// However we may not be on the last virtual node yet so we skip the remaining // However we may not be on the last virtual node yet so we skip the remaining
@ -730,7 +730,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// The frequency should be here, because we come here only if this is actually // The frequency should be here, because we come here only if this is actually
// a terminal node, and we are on its last char. // a terminal node, and we are on its last char.
const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
onTerminal(freq, mCorrectionState); onTerminal(freq, mCorrection);
} }
// If there are more chars in this node, then this virtual node has children. // If there are more chars in this node, then this virtual node has children.
@ -751,7 +751,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
} }
// Optimization: Prune out words that are too long compared to how much was typed. // Optimization: Prune out words that are too long compared to how much was typed.
if (correctionState->needsToPrune()) { if (correction->needsToPrune()) {
pos = BinaryFormat::skipFrequency(flags, pos); pos = BinaryFormat::skipFrequency(flags, pos);
*nextSiblingPosition = *nextSiblingPosition =
BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos); BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);

View File

@ -18,7 +18,7 @@
#define LATINIME_UNIGRAM_DICTIONARY_H #define LATINIME_UNIGRAM_DICTIONARY_H
#include <stdint.h> #include <stdint.h>
#include "correction_state.h" #include "correction.h"
#include "defines.h" #include "defines.h"
#include "proximity_info.h" #include "proximity_info.h"
@ -89,17 +89,17 @@ private:
void getSuggestionCandidates(const int skipPos, const int excessivePos, void getSuggestionCandidates(const int skipPos, const int excessivePos,
const int transposedPos); const int transposedPos);
bool addWord(unsigned short *word, int length, int frequency); bool addWord(unsigned short *word, int length, int frequency);
void getSplitTwoWordsSuggestion(const int inputLength, CorrectionState *correctionState); void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction);
void getMissingSpaceWords( void getMissingSpaceWords(
const int inputLength, const int missingSpacePos, CorrectionState *correctionState); const int inputLength, const int missingSpacePos, Correction *correction);
void getMistypedSpaceWords( void getMistypedSpaceWords(
const int inputLength, const int spaceProximityPos, CorrectionState *correctionState); const int inputLength, const int spaceProximityPos, Correction *correction);
void onTerminal(const int freq, CorrectionState *correctionState); void onTerminal(const int freq, Correction *correction);
bool needsToSkipCurrentNode(const unsigned short c, bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth); const int inputIndex, const int skipPos, const int depth);
// Process a node by considering proximity, missing and excessive character // Process a node by considering proximity, missing and excessive character
bool processCurrentNode(const int initialPos, bool processCurrentNode(const int initialPos,
CorrectionState *correctionState, int *newCount, Correction *correction, int *newCount,
int *newChildPosition, int *nextSiblingPosition); int *newChildPosition, int *nextSiblingPosition);
int getMostFrequentWordLike(const int startInputIndex, const int inputLength, int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
unsigned short *word); unsigned short *word);
@ -129,7 +129,7 @@ private:
int *mFrequencies; int *mFrequencies;
unsigned short *mOutputChars; unsigned short *mOutputChars;
ProximityInfo *mProximityInfo; ProximityInfo *mProximityInfo;
CorrectionState *mCorrectionState; Correction *mCorrection;
int mInputLength; int mInputLength;
// MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];