Refactor CorrectionState to Correction

Change-Id: I5f1ce35413731f930b43b1c82014e65d9eaa240b
main
satok 2011-08-10 14:30:10 +09:00
parent 11b7febc0b
commit cfca3c6317
6 changed files with 93 additions and 93 deletions

View File

@ -14,7 +14,7 @@ LOCAL_SRC_FILES := \
jni/jni_common.cpp \
src/bigram_dictionary.cpp \
src/char_utils.cpp \
src/correction_state.cpp \
src/correction.cpp \
src/dictionary.cpp \
src/proximity_info.cpp \
src/unigram_dictionary.cpp

View File

@ -18,9 +18,9 @@
#include <stdio.h>
#include <string.h>
#define LOG_TAG "LatinIME: correction_state.cpp"
#define LOG_TAG "LatinIME: correction.cpp"
#include "correction_state.h"
#include "correction.h"
#include "proximity_info.h"
namespace latinime {
@ -30,20 +30,20 @@ namespace latinime {
//////////////////////
static const char QUOTE = '\'';
inline bool CorrectionState::isQuote(const unsigned short c) {
inline bool Correction::isQuote(const unsigned short c) {
const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex);
return (c == QUOTE && userTypedChar != QUOTE);
}
/////////////////////
// CorrectionState //
/////////////////////
////////////////
// Correction //
////////////////
CorrectionState::CorrectionState(const int typedLetterMultiplier, const int fullWordMultiplier)
Correction::Correction(const int typedLetterMultiplier, const int fullWordMultiplier)
: TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier) {
}
void CorrectionState::initCorrectionState(const ProximityInfo *pi, const int inputLength,
void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
const int maxDepth) {
mProximityInfo = pi;
mInputLength = inputLength;
@ -52,7 +52,7 @@ void CorrectionState::initCorrectionState(const ProximityInfo *pi, const int inp
mSkippedOutputIndex = -1;
}
void CorrectionState::setCorrectionParams(const int skipPos, const int excessivePos,
void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
const int transposedPos, const int spaceProximityPos, const int missingSpacePos) {
mSkipPos = skipPos;
mExcessivePos = excessivePos;
@ -61,7 +61,7 @@ void CorrectionState::setCorrectionParams(const int skipPos, const int excessive
mMissingSpacePos = missingSpacePos;
}
void CorrectionState::checkState() {
void Correction::checkState() {
if (DEBUG_DICT) {
int inputCount = 0;
if (mSkipPos >= 0) ++inputCount;
@ -72,11 +72,11 @@ void CorrectionState::checkState() {
}
}
int CorrectionState::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq) {
return CorrectionState::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this);
int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq) {
return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this);
}
int CorrectionState::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
const int outputIndex = mTerminalOutputIndex;
const int inputIndex = mTerminalInputIndex;
*wordLength = outputIndex + 1;
@ -86,11 +86,11 @@ int CorrectionState::getFinalFreq(const int freq, unsigned short **word, int *wo
*word = mWord;
const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2)
: (mInputLength == inputIndex + 1);
return CorrectionState::RankingAlgorithm::calculateFinalFreq(
return Correction::RankingAlgorithm::calculateFinalFreq(
inputIndex, outputIndex, mMatchedCharCount, freq, sameLength, this);
}
void CorrectionState::initProcessState(const int matchCount, const int inputIndex,
void Correction::initProcessState(const int matchCount, const int inputIndex,
const int outputIndex, const bool traverseAllNodes, const int diffs) {
mMatchedCharCount = matchCount;
mInputIndex = inputIndex;
@ -99,7 +99,7 @@ void CorrectionState::initProcessState(const int matchCount, const int inputInde
mDiffs = diffs;
}
void CorrectionState::getProcessState(int *matchedCount, int *inputIndex, int *outputIndex,
void Correction::getProcessState(int *matchedCount, int *inputIndex, int *outputIndex,
bool *traverseAllNodes, int *diffs) {
*matchedCount = mMatchedCharCount;
*inputIndex = mInputIndex;
@ -108,43 +108,43 @@ void CorrectionState::getProcessState(int *matchedCount, int *inputIndex, int *o
*diffs = mDiffs;
}
void CorrectionState::charMatched() {
void Correction::charMatched() {
++mMatchedCharCount;
}
// TODO: remove
int CorrectionState::getOutputIndex() {
int Correction::getOutputIndex() {
return mOutputIndex;
}
// TODO: remove
int CorrectionState::getInputIndex() {
int Correction::getInputIndex() {
return mInputIndex;
}
// TODO: remove
bool CorrectionState::needsToTraverseAll() {
bool Correction::needsToTraverseAll() {
return mTraverseAllNodes;
}
void CorrectionState::incrementInputIndex() {
void Correction::incrementInputIndex() {
++mInputIndex;
}
void CorrectionState::incrementOutputIndex() {
void Correction::incrementOutputIndex() {
++mOutputIndex;
}
void CorrectionState::startTraverseAll() {
void Correction::startTraverseAll() {
mTraverseAllNodes = true;
}
bool CorrectionState::needsToPrune() const {
bool Correction::needsToPrune() const {
return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth)
|| mDiffs > mMaxEditDistance);
}
CorrectionState::CorrectionStateType CorrectionState::processSkipChar(
Correction::CorrectionType Correction::processSkipChar(
const int32_t c, const bool isTerminal) {
mWord[mOutputIndex] = c;
if (needsToTraverseAll() && isTerminal) {
@ -158,9 +158,9 @@ CorrectionState::CorrectionStateType CorrectionState::processSkipChar(
}
}
CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState(
Correction::CorrectionType Correction::processCharAndCalcState(
const int32_t c, const bool isTerminal) {
CorrectionStateType currentStateType = NOT_ON_TERMINAL;
CorrectionType currentStateType = NOT_ON_TERMINAL;
// This has to be done for each virtual char (this forwards the "inputIndex" which
// is the index in the user-inputted chars, as read by proximity chars.
if (mExcessivePos == mOutputIndex && mInputIndex < mInputLength - 1) {
@ -249,7 +249,7 @@ CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState(
return currentStateType;
}
CorrectionState::~CorrectionState() {
Correction::~Correction() {
}
/////////////////////////
@ -302,17 +302,17 @@ inline static void multiplyRate(const int rate, int *freq) {
// RankingAlgorithm //
//////////////////////
int CorrectionState::RankingAlgorithm::calculateFinalFreq(
int Correction::RankingAlgorithm::calculateFinalFreq(
const int inputIndex, const int outputIndex,
const int matchCount, const int freq, const bool sameLength,
const CorrectionState* correctionState) {
const int skipPos = correctionState->getSkipPos();
const int excessivePos = correctionState->getExcessivePos();
const int transposedPos = correctionState->getTransposedPos();
const int inputLength = correctionState->mInputLength;
const int typedLetterMultiplier = correctionState->TYPED_LETTER_MULTIPLIER;
const int fullWordMultiplier = correctionState->FULL_WORD_MULTIPLIER;
const ProximityInfo *proximityInfo = correctionState->mProximityInfo;
const Correction* correction) {
const int skipPos = correction->getSkipPos();
const int excessivePos = correction->getExcessivePos();
const int transposedPos = correction->getTransposedPos();
const int inputLength = correction->mInputLength;
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
const ProximityInfo *proximityInfo = correction->mProximityInfo;
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
// TODO: Demote by edit distance
@ -370,10 +370,10 @@ int CorrectionState::RankingAlgorithm::calculateFinalFreq(
return finalFreq;
}
int CorrectionState::RankingAlgorithm::calcFreqForSplitTwoWords(
const int firstFreq, const int secondFreq, const CorrectionState* correctionState) {
const int spaceProximityPos = correctionState->mSpaceProximityPos;
const int missingSpacePos = correctionState->mMissingSpacePos;
int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
const int firstFreq, const int secondFreq, const Correction* correction) {
const int spaceProximityPos = correction->mSpaceProximityPos;
const int missingSpacePos = correction->mMissingSpacePos;
if (DEBUG_DICT) {
int inputCount = 0;
if (spaceProximityPos >= 0) ++inputCount;
@ -381,12 +381,12 @@ int CorrectionState::RankingAlgorithm::calcFreqForSplitTwoWords(
assert(inputCount <= 1);
}
const bool isSpaceProximity = spaceProximityPos >= 0;
const int inputLength = correctionState->mInputLength;
const int inputLength = correction->mInputLength;
const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
const int secondWordLength = isSpaceProximity
? (inputLength - spaceProximityPos - 1)
: (inputLength - missingSpacePos);
const int typedLetterMultiplier = correctionState->TYPED_LETTER_MULTIPLIER;
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
if (firstWordLength == 0 || secondWordLength == 0) {
return 0;

View File

@ -14,8 +14,8 @@
* limitations under the License.
*/
#ifndef LATINIME_CORRECTION_STATE_H
#define LATINIME_CORRECTION_STATE_H
#ifndef LATINIME_CORRECTION_H
#define LATINIME_CORRECTION_H
#include <stdint.h>
@ -25,7 +25,7 @@ namespace latinime {
class ProximityInfo;
class CorrectionState {
class Correction {
public:
typedef enum {
@ -34,10 +34,10 @@ public:
UNRELATED,
ON_TERMINAL,
NOT_ON_TERMINAL
} CorrectionStateType;
} CorrectionType;
CorrectionState(const int typedLetterMultiplier, const int fullWordMultiplier);
void initCorrectionState(
Correction(const int typedLetterMultiplier, const int fullWordMultiplier);
void initCorrection(
const ProximityInfo *pi, const int inputLength, const int maxWordLength);
void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
const int spaceProximityPos, const int missingSpacePos);
@ -50,7 +50,7 @@ public:
int getInputIndex();
bool needsToTraverseAll();
virtual ~CorrectionState();
virtual ~Correction();
int getSpaceProximityPos() const {
return mSpaceProximityPos;
}
@ -75,7 +75,7 @@ public:
int getFreqForSplitTwoWords(const int firstFreq, const int secondFreq);
int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
CorrectionStateType processCharAndCalcState(const int32_t c, const bool isTerminal);
CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
int getDiffs() const {
return mDiffs;
@ -117,16 +117,16 @@ private:
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
inline bool isQuote(const unsigned short c);
inline CorrectionStateType processSkipChar(const int32_t c, const bool isTerminal);
inline CorrectionType processSkipChar(const int32_t c, const bool isTerminal);
class RankingAlgorithm {
public:
static int calculateFinalFreq(const int inputIndex, const int depth,
const int matchCount, const int freq, const bool sameLength,
const CorrectionState* correctionState);
const Correction* correction);
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
const CorrectionState* correctionState);
const Correction* correction);
};
};
} // namespace latinime
#endif // LATINIME_CORRECTION_INFO_H
#endif // LATINIME_CORRECTION_H

View File

@ -23,7 +23,7 @@
namespace latinime {
class CorrectionState;
class Correction;
class ProximityInfo {
public:

View File

@ -48,11 +48,11 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed
if (DEBUG_DICT) {
LOGI("UnigramDictionary - constructor");
}
mCorrectionState = new CorrectionState(typedLetterMultiplier, fullWordMultiplier);
mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier);
}
UnigramDictionary::~UnigramDictionary() {
delete mCorrectionState;
delete mCorrection;
}
static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize,
@ -184,7 +184,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
if (DEBUG_DICT) assert(codesSize == mInputLength);
const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
mCorrectionState->initCorrectionState(mProximityInfo, mInputLength, maxDepth);
mCorrection->initCorrection(mProximityInfo, mInputLength, maxDepth);
PROF_END(0);
PROF_START(1);
@ -237,7 +237,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
if (DEBUG_DICT) {
LOGI("--- Suggest missing space characters %d", i);
}
getMissingSpaceWords(mInputLength, i, mCorrectionState);
getMissingSpaceWords(mInputLength, i, mCorrection);
}
}
PROF_END(5);
@ -256,7 +256,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
i, x, y, proximityInfo->hasSpaceProximity(x, y));
}
if (proximityInfo->hasSpaceProximity(x, y)) {
getMistypedSpaceWords(mInputLength, i, mCorrectionState);
getMistypedSpaceWords(mInputLength, i, mCorrection);
}
}
}
@ -347,7 +347,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
assert(excessivePos < mInputLength);
assert(missingPos < mInputLength);
}
mCorrectionState->setCorrectionParams(skipPos, excessivePos, transposedPos,
mCorrection->setCorrectionParams(skipPos, excessivePos, transposedPos,
-1 /* spaceProximityPos */, -1 /* missingSpacePos */);
int rootPosition = ROOT_POS;
// Get the number of children of root, then increment the position
@ -368,13 +368,13 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
--mStackChildCount[depth];
int siblingPos = mStackSiblingPos[depth];
int firstChildPos;
mCorrectionState->initProcessState(
mCorrection->initProcessState(
mStackMatchedCount[depth], mStackInputIndex[depth], mStackOutputIndex[depth],
mStackTraverseAll[depth], mStackDiffs[depth]);
// needsToTraverseChildrenNodes should be false
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
mCorrectionState, &childCount, &firstChildPos, &siblingPos);
mCorrection, &childCount, &firstChildPos, &siblingPos);
// Update next sibling pos
mStackSiblingPos[depth] = siblingPos;
if (needsToTraverseChildrenNodes) {
@ -383,7 +383,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
mStackChildCount[depth] = childCount;
mStackSiblingPos[depth] = firstChildPos;
mCorrectionState->getProcessState(&mStackMatchedCount[depth],
mCorrection->getProcessState(&mStackMatchedCount[depth],
&mStackInputIndex[depth], &mStackOutputIndex[depth],
&mStackTraverseAll[depth], &mStackDiffs[depth]);
}
@ -409,17 +409,17 @@ inline static void multiplyIntCapped(const int multiplier, int *base) {
}
void UnigramDictionary::getMissingSpaceWords(
const int inputLength, const int missingSpacePos, CorrectionState *correctionState) {
correctionState->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
const int inputLength, const int missingSpacePos, Correction *correction) {
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos);
getSplitTwoWordsSuggestion(inputLength, correctionState);
getSplitTwoWordsSuggestion(inputLength, correction);
}
void UnigramDictionary::getMistypedSpaceWords(
const int inputLength, const int spaceProximityPos, CorrectionState *correctionState) {
correctionState->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
const int inputLength, const int spaceProximityPos, Correction *correction) {
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */);
getSplitTwoWordsSuggestion(inputLength, correctionState);
getSplitTwoWordsSuggestion(inputLength, correction);
}
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
@ -429,19 +429,19 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
}
inline void UnigramDictionary::onTerminal(const int freq, CorrectionState *correctionState) {
inline void UnigramDictionary::onTerminal(const int freq, Correction *correction) {
int wordLength;
unsigned short* wordPointer;
const int finalFreq = correctionState->getFinalFreq(freq, &wordPointer, &wordLength);
const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
if (finalFreq >= 0) {
addWord(wordPointer, wordLength, finalFreq);
}
}
void UnigramDictionary::getSplitTwoWordsSuggestion(
const int inputLength, CorrectionState* correctionState) {
const int spaceProximityPos = correctionState->getSpaceProximityPos();
const int missingSpacePos = correctionState->getMissingSpacePos();
const int inputLength, Correction* correction) {
const int spaceProximityPos = correction->getSpaceProximityPos();
const int missingSpacePos = correction->getMissingSpacePos();
if (DEBUG_DICT) {
int inputCount = 0;
if (spaceProximityPos >= 0) ++inputCount;
@ -485,7 +485,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestion(
word[i] = mWord[i - firstWordLength - 1];
}
const int pairFreq = mCorrectionState->getFreqForSplitTwoWords(firstFreq, secondFreq);
const int pairFreq = mCorrection->getFreqForSplitTwoWords(firstFreq, secondFreq);
if (DEBUG_DICT) {
LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
}
@ -650,10 +650,10 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
// the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any
// given level, as output into newCount when traversing this level's parent.
inline bool UnigramDictionary::processCurrentNode(const int initialPos,
CorrectionState *correctionState, int *newCount,
Correction *correction, int *newCount,
int *newChildrenPosition, int *nextSiblingPosition) {
if (DEBUG_DICT) {
correctionState->checkState();
correction->checkState();
}
int pos = initialPos;
@ -697,12 +697,12 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// If we are on the last char, this virtual node is a terminal if this node is.
const bool isTerminal = isLastChar && isTerminalNode;
CorrectionState::CorrectionStateType stateType = correctionState->processCharAndCalcState(
Correction::CorrectionType stateType = correction->processCharAndCalcState(
c, isTerminal);
if (stateType == CorrectionState::TRAVERSE_ALL_ON_TERMINAL
|| stateType == CorrectionState::ON_TERMINAL) {
if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL
|| stateType == Correction::ON_TERMINAL) {
needsToInvokeOnTerminal = true;
} else if (stateType == CorrectionState::UNRELATED) {
} else if (stateType == Correction::UNRELATED) {
// We found that this is an unrelated character, so we should give up traversing
// this node and its children entirely.
// However we may not be on the last virtual node yet so we skip the remaining
@ -730,7 +730,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// The frequency should be here, because we come here only if this is actually
// a terminal node, and we are on its last char.
const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
onTerminal(freq, mCorrectionState);
onTerminal(freq, mCorrection);
}
// If there are more chars in this node, then this virtual node has children.
@ -751,7 +751,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
}
// Optimization: Prune out words that are too long compared to how much was typed.
if (correctionState->needsToPrune()) {
if (correction->needsToPrune()) {
pos = BinaryFormat::skipFrequency(flags, pos);
*nextSiblingPosition =
BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);

View File

@ -18,7 +18,7 @@
#define LATINIME_UNIGRAM_DICTIONARY_H
#include <stdint.h>
#include "correction_state.h"
#include "correction.h"
#include "defines.h"
#include "proximity_info.h"
@ -89,17 +89,17 @@ private:
void getSuggestionCandidates(const int skipPos, const int excessivePos,
const int transposedPos);
bool addWord(unsigned short *word, int length, int frequency);
void getSplitTwoWordsSuggestion(const int inputLength, CorrectionState *correctionState);
void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction);
void getMissingSpaceWords(
const int inputLength, const int missingSpacePos, CorrectionState *correctionState);
const int inputLength, const int missingSpacePos, Correction *correction);
void getMistypedSpaceWords(
const int inputLength, const int spaceProximityPos, CorrectionState *correctionState);
void onTerminal(const int freq, CorrectionState *correctionState);
const int inputLength, const int spaceProximityPos, Correction *correction);
void onTerminal(const int freq, Correction *correction);
bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth);
// Process a node by considering proximity, missing and excessive character
bool processCurrentNode(const int initialPos,
CorrectionState *correctionState, int *newCount,
Correction *correction, int *newCount,
int *newChildPosition, int *nextSiblingPosition);
int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
unsigned short *word);
@ -129,7 +129,7 @@ private:
int *mFrequencies;
unsigned short *mOutputChars;
ProximityInfo *mProximityInfo;
CorrectionState *mCorrectionState;
Correction *mCorrection;
int mInputLength;
// MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];