Refactor CorrectionState to Correction
Change-Id: I5f1ce35413731f930b43b1c82014e65d9eaa240bmain
parent
11b7febc0b
commit
cfca3c6317
|
@ -14,7 +14,7 @@ LOCAL_SRC_FILES := \
|
||||||
jni/jni_common.cpp \
|
jni/jni_common.cpp \
|
||||||
src/bigram_dictionary.cpp \
|
src/bigram_dictionary.cpp \
|
||||||
src/char_utils.cpp \
|
src/char_utils.cpp \
|
||||||
src/correction_state.cpp \
|
src/correction.cpp \
|
||||||
src/dictionary.cpp \
|
src/dictionary.cpp \
|
||||||
src/proximity_info.cpp \
|
src/proximity_info.cpp \
|
||||||
src/unigram_dictionary.cpp
|
src/unigram_dictionary.cpp
|
||||||
|
|
|
@ -18,9 +18,9 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#define LOG_TAG "LatinIME: correction_state.cpp"
|
#define LOG_TAG "LatinIME: correction.cpp"
|
||||||
|
|
||||||
#include "correction_state.h"
|
#include "correction.h"
|
||||||
#include "proximity_info.h"
|
#include "proximity_info.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -30,20 +30,20 @@ namespace latinime {
|
||||||
//////////////////////
|
//////////////////////
|
||||||
static const char QUOTE = '\'';
|
static const char QUOTE = '\'';
|
||||||
|
|
||||||
inline bool CorrectionState::isQuote(const unsigned short c) {
|
inline bool Correction::isQuote(const unsigned short c) {
|
||||||
const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex);
|
const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex);
|
||||||
return (c == QUOTE && userTypedChar != QUOTE);
|
return (c == QUOTE && userTypedChar != QUOTE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////
|
////////////////
|
||||||
// CorrectionState //
|
// Correction //
|
||||||
/////////////////////
|
////////////////
|
||||||
|
|
||||||
CorrectionState::CorrectionState(const int typedLetterMultiplier, const int fullWordMultiplier)
|
Correction::Correction(const int typedLetterMultiplier, const int fullWordMultiplier)
|
||||||
: TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier) {
|
: TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void CorrectionState::initCorrectionState(const ProximityInfo *pi, const int inputLength,
|
void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
|
||||||
const int maxDepth) {
|
const int maxDepth) {
|
||||||
mProximityInfo = pi;
|
mProximityInfo = pi;
|
||||||
mInputLength = inputLength;
|
mInputLength = inputLength;
|
||||||
|
@ -52,7 +52,7 @@ void CorrectionState::initCorrectionState(const ProximityInfo *pi, const int inp
|
||||||
mSkippedOutputIndex = -1;
|
mSkippedOutputIndex = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CorrectionState::setCorrectionParams(const int skipPos, const int excessivePos,
|
void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
|
||||||
const int transposedPos, const int spaceProximityPos, const int missingSpacePos) {
|
const int transposedPos, const int spaceProximityPos, const int missingSpacePos) {
|
||||||
mSkipPos = skipPos;
|
mSkipPos = skipPos;
|
||||||
mExcessivePos = excessivePos;
|
mExcessivePos = excessivePos;
|
||||||
|
@ -61,7 +61,7 @@ void CorrectionState::setCorrectionParams(const int skipPos, const int excessive
|
||||||
mMissingSpacePos = missingSpacePos;
|
mMissingSpacePos = missingSpacePos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CorrectionState::checkState() {
|
void Correction::checkState() {
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
int inputCount = 0;
|
int inputCount = 0;
|
||||||
if (mSkipPos >= 0) ++inputCount;
|
if (mSkipPos >= 0) ++inputCount;
|
||||||
|
@ -72,11 +72,11 @@ void CorrectionState::checkState() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int CorrectionState::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq) {
|
int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq) {
|
||||||
return CorrectionState::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this);
|
return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this);
|
||||||
}
|
}
|
||||||
|
|
||||||
int CorrectionState::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
|
int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
|
||||||
const int outputIndex = mTerminalOutputIndex;
|
const int outputIndex = mTerminalOutputIndex;
|
||||||
const int inputIndex = mTerminalInputIndex;
|
const int inputIndex = mTerminalInputIndex;
|
||||||
*wordLength = outputIndex + 1;
|
*wordLength = outputIndex + 1;
|
||||||
|
@ -86,11 +86,11 @@ int CorrectionState::getFinalFreq(const int freq, unsigned short **word, int *wo
|
||||||
*word = mWord;
|
*word = mWord;
|
||||||
const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2)
|
const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2)
|
||||||
: (mInputLength == inputIndex + 1);
|
: (mInputLength == inputIndex + 1);
|
||||||
return CorrectionState::RankingAlgorithm::calculateFinalFreq(
|
return Correction::RankingAlgorithm::calculateFinalFreq(
|
||||||
inputIndex, outputIndex, mMatchedCharCount, freq, sameLength, this);
|
inputIndex, outputIndex, mMatchedCharCount, freq, sameLength, this);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CorrectionState::initProcessState(const int matchCount, const int inputIndex,
|
void Correction::initProcessState(const int matchCount, const int inputIndex,
|
||||||
const int outputIndex, const bool traverseAllNodes, const int diffs) {
|
const int outputIndex, const bool traverseAllNodes, const int diffs) {
|
||||||
mMatchedCharCount = matchCount;
|
mMatchedCharCount = matchCount;
|
||||||
mInputIndex = inputIndex;
|
mInputIndex = inputIndex;
|
||||||
|
@ -99,7 +99,7 @@ void CorrectionState::initProcessState(const int matchCount, const int inputInde
|
||||||
mDiffs = diffs;
|
mDiffs = diffs;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CorrectionState::getProcessState(int *matchedCount, int *inputIndex, int *outputIndex,
|
void Correction::getProcessState(int *matchedCount, int *inputIndex, int *outputIndex,
|
||||||
bool *traverseAllNodes, int *diffs) {
|
bool *traverseAllNodes, int *diffs) {
|
||||||
*matchedCount = mMatchedCharCount;
|
*matchedCount = mMatchedCharCount;
|
||||||
*inputIndex = mInputIndex;
|
*inputIndex = mInputIndex;
|
||||||
|
@ -108,43 +108,43 @@ void CorrectionState::getProcessState(int *matchedCount, int *inputIndex, int *o
|
||||||
*diffs = mDiffs;
|
*diffs = mDiffs;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CorrectionState::charMatched() {
|
void Correction::charMatched() {
|
||||||
++mMatchedCharCount;
|
++mMatchedCharCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: remove
|
// TODO: remove
|
||||||
int CorrectionState::getOutputIndex() {
|
int Correction::getOutputIndex() {
|
||||||
return mOutputIndex;
|
return mOutputIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: remove
|
// TODO: remove
|
||||||
int CorrectionState::getInputIndex() {
|
int Correction::getInputIndex() {
|
||||||
return mInputIndex;
|
return mInputIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: remove
|
// TODO: remove
|
||||||
bool CorrectionState::needsToTraverseAll() {
|
bool Correction::needsToTraverseAll() {
|
||||||
return mTraverseAllNodes;
|
return mTraverseAllNodes;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CorrectionState::incrementInputIndex() {
|
void Correction::incrementInputIndex() {
|
||||||
++mInputIndex;
|
++mInputIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CorrectionState::incrementOutputIndex() {
|
void Correction::incrementOutputIndex() {
|
||||||
++mOutputIndex;
|
++mOutputIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CorrectionState::startTraverseAll() {
|
void Correction::startTraverseAll() {
|
||||||
mTraverseAllNodes = true;
|
mTraverseAllNodes = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CorrectionState::needsToPrune() const {
|
bool Correction::needsToPrune() const {
|
||||||
return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth)
|
return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth)
|
||||||
|| mDiffs > mMaxEditDistance);
|
|| mDiffs > mMaxEditDistance);
|
||||||
}
|
}
|
||||||
|
|
||||||
CorrectionState::CorrectionStateType CorrectionState::processSkipChar(
|
Correction::CorrectionType Correction::processSkipChar(
|
||||||
const int32_t c, const bool isTerminal) {
|
const int32_t c, const bool isTerminal) {
|
||||||
mWord[mOutputIndex] = c;
|
mWord[mOutputIndex] = c;
|
||||||
if (needsToTraverseAll() && isTerminal) {
|
if (needsToTraverseAll() && isTerminal) {
|
||||||
|
@ -158,9 +158,9 @@ CorrectionState::CorrectionStateType CorrectionState::processSkipChar(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState(
|
Correction::CorrectionType Correction::processCharAndCalcState(
|
||||||
const int32_t c, const bool isTerminal) {
|
const int32_t c, const bool isTerminal) {
|
||||||
CorrectionStateType currentStateType = NOT_ON_TERMINAL;
|
CorrectionType currentStateType = NOT_ON_TERMINAL;
|
||||||
// This has to be done for each virtual char (this forwards the "inputIndex" which
|
// This has to be done for each virtual char (this forwards the "inputIndex" which
|
||||||
// is the index in the user-inputted chars, as read by proximity chars.
|
// is the index in the user-inputted chars, as read by proximity chars.
|
||||||
if (mExcessivePos == mOutputIndex && mInputIndex < mInputLength - 1) {
|
if (mExcessivePos == mOutputIndex && mInputIndex < mInputLength - 1) {
|
||||||
|
@ -249,7 +249,7 @@ CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState(
|
||||||
return currentStateType;
|
return currentStateType;
|
||||||
}
|
}
|
||||||
|
|
||||||
CorrectionState::~CorrectionState() {
|
Correction::~Correction() {
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
|
@ -302,17 +302,17 @@ inline static void multiplyRate(const int rate, int *freq) {
|
||||||
// RankingAlgorithm //
|
// RankingAlgorithm //
|
||||||
//////////////////////
|
//////////////////////
|
||||||
|
|
||||||
int CorrectionState::RankingAlgorithm::calculateFinalFreq(
|
int Correction::RankingAlgorithm::calculateFinalFreq(
|
||||||
const int inputIndex, const int outputIndex,
|
const int inputIndex, const int outputIndex,
|
||||||
const int matchCount, const int freq, const bool sameLength,
|
const int matchCount, const int freq, const bool sameLength,
|
||||||
const CorrectionState* correctionState) {
|
const Correction* correction) {
|
||||||
const int skipPos = correctionState->getSkipPos();
|
const int skipPos = correction->getSkipPos();
|
||||||
const int excessivePos = correctionState->getExcessivePos();
|
const int excessivePos = correction->getExcessivePos();
|
||||||
const int transposedPos = correctionState->getTransposedPos();
|
const int transposedPos = correction->getTransposedPos();
|
||||||
const int inputLength = correctionState->mInputLength;
|
const int inputLength = correction->mInputLength;
|
||||||
const int typedLetterMultiplier = correctionState->TYPED_LETTER_MULTIPLIER;
|
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
|
||||||
const int fullWordMultiplier = correctionState->FULL_WORD_MULTIPLIER;
|
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
|
||||||
const ProximityInfo *proximityInfo = correctionState->mProximityInfo;
|
const ProximityInfo *proximityInfo = correction->mProximityInfo;
|
||||||
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
|
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
|
||||||
|
|
||||||
// TODO: Demote by edit distance
|
// TODO: Demote by edit distance
|
||||||
|
@ -370,10 +370,10 @@ int CorrectionState::RankingAlgorithm::calculateFinalFreq(
|
||||||
return finalFreq;
|
return finalFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
int CorrectionState::RankingAlgorithm::calcFreqForSplitTwoWords(
|
int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
|
||||||
const int firstFreq, const int secondFreq, const CorrectionState* correctionState) {
|
const int firstFreq, const int secondFreq, const Correction* correction) {
|
||||||
const int spaceProximityPos = correctionState->mSpaceProximityPos;
|
const int spaceProximityPos = correction->mSpaceProximityPos;
|
||||||
const int missingSpacePos = correctionState->mMissingSpacePos;
|
const int missingSpacePos = correction->mMissingSpacePos;
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
int inputCount = 0;
|
int inputCount = 0;
|
||||||
if (spaceProximityPos >= 0) ++inputCount;
|
if (spaceProximityPos >= 0) ++inputCount;
|
||||||
|
@ -381,12 +381,12 @@ int CorrectionState::RankingAlgorithm::calcFreqForSplitTwoWords(
|
||||||
assert(inputCount <= 1);
|
assert(inputCount <= 1);
|
||||||
}
|
}
|
||||||
const bool isSpaceProximity = spaceProximityPos >= 0;
|
const bool isSpaceProximity = spaceProximityPos >= 0;
|
||||||
const int inputLength = correctionState->mInputLength;
|
const int inputLength = correction->mInputLength;
|
||||||
const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
|
const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
|
||||||
const int secondWordLength = isSpaceProximity
|
const int secondWordLength = isSpaceProximity
|
||||||
? (inputLength - spaceProximityPos - 1)
|
? (inputLength - spaceProximityPos - 1)
|
||||||
: (inputLength - missingSpacePos);
|
: (inputLength - missingSpacePos);
|
||||||
const int typedLetterMultiplier = correctionState->TYPED_LETTER_MULTIPLIER;
|
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
|
||||||
|
|
||||||
if (firstWordLength == 0 || secondWordLength == 0) {
|
if (firstWordLength == 0 || secondWordLength == 0) {
|
||||||
return 0;
|
return 0;
|
|
@ -14,8 +14,8 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef LATINIME_CORRECTION_STATE_H
|
#ifndef LATINIME_CORRECTION_H
|
||||||
#define LATINIME_CORRECTION_STATE_H
|
#define LATINIME_CORRECTION_H
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@ namespace latinime {
|
||||||
|
|
||||||
class ProximityInfo;
|
class ProximityInfo;
|
||||||
|
|
||||||
class CorrectionState {
|
class Correction {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
@ -34,10 +34,10 @@ public:
|
||||||
UNRELATED,
|
UNRELATED,
|
||||||
ON_TERMINAL,
|
ON_TERMINAL,
|
||||||
NOT_ON_TERMINAL
|
NOT_ON_TERMINAL
|
||||||
} CorrectionStateType;
|
} CorrectionType;
|
||||||
|
|
||||||
CorrectionState(const int typedLetterMultiplier, const int fullWordMultiplier);
|
Correction(const int typedLetterMultiplier, const int fullWordMultiplier);
|
||||||
void initCorrectionState(
|
void initCorrection(
|
||||||
const ProximityInfo *pi, const int inputLength, const int maxWordLength);
|
const ProximityInfo *pi, const int inputLength, const int maxWordLength);
|
||||||
void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
|
void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
|
||||||
const int spaceProximityPos, const int missingSpacePos);
|
const int spaceProximityPos, const int missingSpacePos);
|
||||||
|
@ -50,7 +50,7 @@ public:
|
||||||
int getInputIndex();
|
int getInputIndex();
|
||||||
bool needsToTraverseAll();
|
bool needsToTraverseAll();
|
||||||
|
|
||||||
virtual ~CorrectionState();
|
virtual ~Correction();
|
||||||
int getSpaceProximityPos() const {
|
int getSpaceProximityPos() const {
|
||||||
return mSpaceProximityPos;
|
return mSpaceProximityPos;
|
||||||
}
|
}
|
||||||
|
@ -75,7 +75,7 @@ public:
|
||||||
int getFreqForSplitTwoWords(const int firstFreq, const int secondFreq);
|
int getFreqForSplitTwoWords(const int firstFreq, const int secondFreq);
|
||||||
int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
|
int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
|
||||||
|
|
||||||
CorrectionStateType processCharAndCalcState(const int32_t c, const bool isTerminal);
|
CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
|
||||||
|
|
||||||
int getDiffs() const {
|
int getDiffs() const {
|
||||||
return mDiffs;
|
return mDiffs;
|
||||||
|
@ -117,16 +117,16 @@ private:
|
||||||
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
||||||
|
|
||||||
inline bool isQuote(const unsigned short c);
|
inline bool isQuote(const unsigned short c);
|
||||||
inline CorrectionStateType processSkipChar(const int32_t c, const bool isTerminal);
|
inline CorrectionType processSkipChar(const int32_t c, const bool isTerminal);
|
||||||
|
|
||||||
class RankingAlgorithm {
|
class RankingAlgorithm {
|
||||||
public:
|
public:
|
||||||
static int calculateFinalFreq(const int inputIndex, const int depth,
|
static int calculateFinalFreq(const int inputIndex, const int depth,
|
||||||
const int matchCount, const int freq, const bool sameLength,
|
const int matchCount, const int freq, const bool sameLength,
|
||||||
const CorrectionState* correctionState);
|
const Correction* correction);
|
||||||
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
|
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
|
||||||
const CorrectionState* correctionState);
|
const Correction* correction);
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_CORRECTION_INFO_H
|
#endif // LATINIME_CORRECTION_H
|
|
@ -23,7 +23,7 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class CorrectionState;
|
class Correction;
|
||||||
|
|
||||||
class ProximityInfo {
|
class ProximityInfo {
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -48,11 +48,11 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
LOGI("UnigramDictionary - constructor");
|
LOGI("UnigramDictionary - constructor");
|
||||||
}
|
}
|
||||||
mCorrectionState = new CorrectionState(typedLetterMultiplier, fullWordMultiplier);
|
mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier);
|
||||||
}
|
}
|
||||||
|
|
||||||
UnigramDictionary::~UnigramDictionary() {
|
UnigramDictionary::~UnigramDictionary() {
|
||||||
delete mCorrectionState;
|
delete mCorrection;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize,
|
static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize,
|
||||||
|
@ -184,7 +184,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
||||||
if (DEBUG_DICT) assert(codesSize == mInputLength);
|
if (DEBUG_DICT) assert(codesSize == mInputLength);
|
||||||
|
|
||||||
const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
||||||
mCorrectionState->initCorrectionState(mProximityInfo, mInputLength, maxDepth);
|
mCorrection->initCorrection(mProximityInfo, mInputLength, maxDepth);
|
||||||
PROF_END(0);
|
PROF_END(0);
|
||||||
|
|
||||||
PROF_START(1);
|
PROF_START(1);
|
||||||
|
@ -237,7 +237,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
LOGI("--- Suggest missing space characters %d", i);
|
LOGI("--- Suggest missing space characters %d", i);
|
||||||
}
|
}
|
||||||
getMissingSpaceWords(mInputLength, i, mCorrectionState);
|
getMissingSpaceWords(mInputLength, i, mCorrection);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
PROF_END(5);
|
PROF_END(5);
|
||||||
|
@ -256,7 +256,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
||||||
i, x, y, proximityInfo->hasSpaceProximity(x, y));
|
i, x, y, proximityInfo->hasSpaceProximity(x, y));
|
||||||
}
|
}
|
||||||
if (proximityInfo->hasSpaceProximity(x, y)) {
|
if (proximityInfo->hasSpaceProximity(x, y)) {
|
||||||
getMistypedSpaceWords(mInputLength, i, mCorrectionState);
|
getMistypedSpaceWords(mInputLength, i, mCorrection);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -347,7 +347,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
|
||||||
assert(excessivePos < mInputLength);
|
assert(excessivePos < mInputLength);
|
||||||
assert(missingPos < mInputLength);
|
assert(missingPos < mInputLength);
|
||||||
}
|
}
|
||||||
mCorrectionState->setCorrectionParams(skipPos, excessivePos, transposedPos,
|
mCorrection->setCorrectionParams(skipPos, excessivePos, transposedPos,
|
||||||
-1 /* spaceProximityPos */, -1 /* missingSpacePos */);
|
-1 /* spaceProximityPos */, -1 /* missingSpacePos */);
|
||||||
int rootPosition = ROOT_POS;
|
int rootPosition = ROOT_POS;
|
||||||
// Get the number of children of root, then increment the position
|
// Get the number of children of root, then increment the position
|
||||||
|
@ -368,13 +368,13 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
|
||||||
--mStackChildCount[depth];
|
--mStackChildCount[depth];
|
||||||
int siblingPos = mStackSiblingPos[depth];
|
int siblingPos = mStackSiblingPos[depth];
|
||||||
int firstChildPos;
|
int firstChildPos;
|
||||||
mCorrectionState->initProcessState(
|
mCorrection->initProcessState(
|
||||||
mStackMatchedCount[depth], mStackInputIndex[depth], mStackOutputIndex[depth],
|
mStackMatchedCount[depth], mStackInputIndex[depth], mStackOutputIndex[depth],
|
||||||
mStackTraverseAll[depth], mStackDiffs[depth]);
|
mStackTraverseAll[depth], mStackDiffs[depth]);
|
||||||
|
|
||||||
// needsToTraverseChildrenNodes should be false
|
// needsToTraverseChildrenNodes should be false
|
||||||
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
|
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
|
||||||
mCorrectionState, &childCount, &firstChildPos, &siblingPos);
|
mCorrection, &childCount, &firstChildPos, &siblingPos);
|
||||||
// Update next sibling pos
|
// Update next sibling pos
|
||||||
mStackSiblingPos[depth] = siblingPos;
|
mStackSiblingPos[depth] = siblingPos;
|
||||||
if (needsToTraverseChildrenNodes) {
|
if (needsToTraverseChildrenNodes) {
|
||||||
|
@ -383,7 +383,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
|
||||||
mStackChildCount[depth] = childCount;
|
mStackChildCount[depth] = childCount;
|
||||||
mStackSiblingPos[depth] = firstChildPos;
|
mStackSiblingPos[depth] = firstChildPos;
|
||||||
|
|
||||||
mCorrectionState->getProcessState(&mStackMatchedCount[depth],
|
mCorrection->getProcessState(&mStackMatchedCount[depth],
|
||||||
&mStackInputIndex[depth], &mStackOutputIndex[depth],
|
&mStackInputIndex[depth], &mStackOutputIndex[depth],
|
||||||
&mStackTraverseAll[depth], &mStackDiffs[depth]);
|
&mStackTraverseAll[depth], &mStackDiffs[depth]);
|
||||||
}
|
}
|
||||||
|
@ -409,17 +409,17 @@ inline static void multiplyIntCapped(const int multiplier, int *base) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnigramDictionary::getMissingSpaceWords(
|
void UnigramDictionary::getMissingSpaceWords(
|
||||||
const int inputLength, const int missingSpacePos, CorrectionState *correctionState) {
|
const int inputLength, const int missingSpacePos, Correction *correction) {
|
||||||
correctionState->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
|
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
|
||||||
-1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos);
|
-1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos);
|
||||||
getSplitTwoWordsSuggestion(inputLength, correctionState);
|
getSplitTwoWordsSuggestion(inputLength, correction);
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnigramDictionary::getMistypedSpaceWords(
|
void UnigramDictionary::getMistypedSpaceWords(
|
||||||
const int inputLength, const int spaceProximityPos, CorrectionState *correctionState) {
|
const int inputLength, const int spaceProximityPos, Correction *correction) {
|
||||||
correctionState->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
|
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
|
||||||
-1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */);
|
-1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */);
|
||||||
getSplitTwoWordsSuggestion(inputLength, correctionState);
|
getSplitTwoWordsSuggestion(inputLength, correction);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
|
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
|
||||||
|
@ -429,19 +429,19 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
|
||||||
return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
|
return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void UnigramDictionary::onTerminal(const int freq, CorrectionState *correctionState) {
|
inline void UnigramDictionary::onTerminal(const int freq, Correction *correction) {
|
||||||
int wordLength;
|
int wordLength;
|
||||||
unsigned short* wordPointer;
|
unsigned short* wordPointer;
|
||||||
const int finalFreq = correctionState->getFinalFreq(freq, &wordPointer, &wordLength);
|
const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
|
||||||
if (finalFreq >= 0) {
|
if (finalFreq >= 0) {
|
||||||
addWord(wordPointer, wordLength, finalFreq);
|
addWord(wordPointer, wordLength, finalFreq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnigramDictionary::getSplitTwoWordsSuggestion(
|
void UnigramDictionary::getSplitTwoWordsSuggestion(
|
||||||
const int inputLength, CorrectionState* correctionState) {
|
const int inputLength, Correction* correction) {
|
||||||
const int spaceProximityPos = correctionState->getSpaceProximityPos();
|
const int spaceProximityPos = correction->getSpaceProximityPos();
|
||||||
const int missingSpacePos = correctionState->getMissingSpacePos();
|
const int missingSpacePos = correction->getMissingSpacePos();
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
int inputCount = 0;
|
int inputCount = 0;
|
||||||
if (spaceProximityPos >= 0) ++inputCount;
|
if (spaceProximityPos >= 0) ++inputCount;
|
||||||
|
@ -485,7 +485,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestion(
|
||||||
word[i] = mWord[i - firstWordLength - 1];
|
word[i] = mWord[i - firstWordLength - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
const int pairFreq = mCorrectionState->getFreqForSplitTwoWords(firstFreq, secondFreq);
|
const int pairFreq = mCorrection->getFreqForSplitTwoWords(firstFreq, secondFreq);
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
|
LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
|
||||||
}
|
}
|
||||||
|
@ -650,10 +650,10 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
|
||||||
// the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any
|
// the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any
|
||||||
// given level, as output into newCount when traversing this level's parent.
|
// given level, as output into newCount when traversing this level's parent.
|
||||||
inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
||||||
CorrectionState *correctionState, int *newCount,
|
Correction *correction, int *newCount,
|
||||||
int *newChildrenPosition, int *nextSiblingPosition) {
|
int *newChildrenPosition, int *nextSiblingPosition) {
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
correctionState->checkState();
|
correction->checkState();
|
||||||
}
|
}
|
||||||
int pos = initialPos;
|
int pos = initialPos;
|
||||||
|
|
||||||
|
@ -697,12 +697,12 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
||||||
// If we are on the last char, this virtual node is a terminal if this node is.
|
// If we are on the last char, this virtual node is a terminal if this node is.
|
||||||
const bool isTerminal = isLastChar && isTerminalNode;
|
const bool isTerminal = isLastChar && isTerminalNode;
|
||||||
|
|
||||||
CorrectionState::CorrectionStateType stateType = correctionState->processCharAndCalcState(
|
Correction::CorrectionType stateType = correction->processCharAndCalcState(
|
||||||
c, isTerminal);
|
c, isTerminal);
|
||||||
if (stateType == CorrectionState::TRAVERSE_ALL_ON_TERMINAL
|
if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL
|
||||||
|| stateType == CorrectionState::ON_TERMINAL) {
|
|| stateType == Correction::ON_TERMINAL) {
|
||||||
needsToInvokeOnTerminal = true;
|
needsToInvokeOnTerminal = true;
|
||||||
} else if (stateType == CorrectionState::UNRELATED) {
|
} else if (stateType == Correction::UNRELATED) {
|
||||||
// We found that this is an unrelated character, so we should give up traversing
|
// We found that this is an unrelated character, so we should give up traversing
|
||||||
// this node and its children entirely.
|
// this node and its children entirely.
|
||||||
// However we may not be on the last virtual node yet so we skip the remaining
|
// However we may not be on the last virtual node yet so we skip the remaining
|
||||||
|
@ -730,7 +730,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
||||||
// The frequency should be here, because we come here only if this is actually
|
// The frequency should be here, because we come here only if this is actually
|
||||||
// a terminal node, and we are on its last char.
|
// a terminal node, and we are on its last char.
|
||||||
const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
|
const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
|
||||||
onTerminal(freq, mCorrectionState);
|
onTerminal(freq, mCorrection);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If there are more chars in this node, then this virtual node has children.
|
// If there are more chars in this node, then this virtual node has children.
|
||||||
|
@ -751,7 +751,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Optimization: Prune out words that are too long compared to how much was typed.
|
// Optimization: Prune out words that are too long compared to how much was typed.
|
||||||
if (correctionState->needsToPrune()) {
|
if (correction->needsToPrune()) {
|
||||||
pos = BinaryFormat::skipFrequency(flags, pos);
|
pos = BinaryFormat::skipFrequency(flags, pos);
|
||||||
*nextSiblingPosition =
|
*nextSiblingPosition =
|
||||||
BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
|
BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
#define LATINIME_UNIGRAM_DICTIONARY_H
|
#define LATINIME_UNIGRAM_DICTIONARY_H
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "correction_state.h"
|
#include "correction.h"
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "proximity_info.h"
|
#include "proximity_info.h"
|
||||||
|
|
||||||
|
@ -89,17 +89,17 @@ private:
|
||||||
void getSuggestionCandidates(const int skipPos, const int excessivePos,
|
void getSuggestionCandidates(const int skipPos, const int excessivePos,
|
||||||
const int transposedPos);
|
const int transposedPos);
|
||||||
bool addWord(unsigned short *word, int length, int frequency);
|
bool addWord(unsigned short *word, int length, int frequency);
|
||||||
void getSplitTwoWordsSuggestion(const int inputLength, CorrectionState *correctionState);
|
void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction);
|
||||||
void getMissingSpaceWords(
|
void getMissingSpaceWords(
|
||||||
const int inputLength, const int missingSpacePos, CorrectionState *correctionState);
|
const int inputLength, const int missingSpacePos, Correction *correction);
|
||||||
void getMistypedSpaceWords(
|
void getMistypedSpaceWords(
|
||||||
const int inputLength, const int spaceProximityPos, CorrectionState *correctionState);
|
const int inputLength, const int spaceProximityPos, Correction *correction);
|
||||||
void onTerminal(const int freq, CorrectionState *correctionState);
|
void onTerminal(const int freq, Correction *correction);
|
||||||
bool needsToSkipCurrentNode(const unsigned short c,
|
bool needsToSkipCurrentNode(const unsigned short c,
|
||||||
const int inputIndex, const int skipPos, const int depth);
|
const int inputIndex, const int skipPos, const int depth);
|
||||||
// Process a node by considering proximity, missing and excessive character
|
// Process a node by considering proximity, missing and excessive character
|
||||||
bool processCurrentNode(const int initialPos,
|
bool processCurrentNode(const int initialPos,
|
||||||
CorrectionState *correctionState, int *newCount,
|
Correction *correction, int *newCount,
|
||||||
int *newChildPosition, int *nextSiblingPosition);
|
int *newChildPosition, int *nextSiblingPosition);
|
||||||
int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
|
int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
|
||||||
unsigned short *word);
|
unsigned short *word);
|
||||||
|
@ -129,7 +129,7 @@ private:
|
||||||
int *mFrequencies;
|
int *mFrequencies;
|
||||||
unsigned short *mOutputChars;
|
unsigned short *mOutputChars;
|
||||||
ProximityInfo *mProximityInfo;
|
ProximityInfo *mProximityInfo;
|
||||||
CorrectionState *mCorrectionState;
|
Correction *mCorrection;
|
||||||
int mInputLength;
|
int mInputLength;
|
||||||
// MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
|
// MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
|
||||||
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
||||||
|
|
Loading…
Reference in New Issue