Removed matchedChar count

Change-Id: I69e92026f802635f900b1e72d089afe4bda5fb0b
This commit is contained in:
satok 2011-08-11 21:25:39 +09:00
parent c122cfc8fd
commit 466ed22fc6
3 changed files with 28 additions and 49 deletions

View file

@ -102,7 +102,7 @@ int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLen
const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2) const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2)
: (mInputLength == inputIndex + 1); : (mInputLength == inputIndex + 1);
return Correction::RankingAlgorithm::calculateFinalFreq( return Correction::RankingAlgorithm::calculateFinalFreq(
inputIndex, outputIndex, mMatchedCharCount, freq, sameLength, this); inputIndex, outputIndex, freq, sameLength, this);
} }
bool Correction::initProcessState(const int outputIndex) { bool Correction::initProcessState(const int outputIndex) {
@ -111,10 +111,9 @@ bool Correction::initProcessState(const int outputIndex) {
} }
mOutputIndex = outputIndex; mOutputIndex = outputIndex;
--(mCorrectionStates[outputIndex].mChildCount); --(mCorrectionStates[outputIndex].mChildCount);
mMatchedCharCount = mCorrectionStates[outputIndex].mMatchedCount;
mInputIndex = mCorrectionStates[outputIndex].mInputIndex; mInputIndex = mCorrectionStates[outputIndex].mInputIndex;
mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes; mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes;
mDiffs = mCorrectionStates[outputIndex].mDiffs; mProximityCount = mCorrectionStates[outputIndex].mProximityCount;
mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount; mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount;
mSkipPos = mCorrectionStates[outputIndex].mSkipPos; mSkipPos = mCorrectionStates[outputIndex].mSkipPos;
mSkipping = false; mSkipping = false;
@ -130,10 +129,6 @@ int Correction::goDownTree(
return mOutputIndex; return mOutputIndex;
} }
void Correction::charMatched() {
++mMatchedCharCount;
}
// TODO: remove // TODO: remove
int Correction::getOutputIndex() { int Correction::getOutputIndex() {
return mOutputIndex; return mOutputIndex;
@ -158,10 +153,9 @@ void Correction::incrementOutputIndex() {
mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex; mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount; mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos; mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
mCorrectionStates[mOutputIndex].mMatchedCount = mMatchedCharCount;
mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex; mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes; mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
mCorrectionStates[mOutputIndex].mDiffs = mDiffs; mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount; mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
mCorrectionStates[mOutputIndex].mSkipping = mSkipping; mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos; mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
@ -174,7 +168,7 @@ void Correction::startToTraverseAllNodes() {
bool Correction::needsToPrune() const { bool Correction::needsToPrune() const {
return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth) return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth)
|| mDiffs > mMaxEditDistance); || mProximityCount > mMaxEditDistance);
} }
Correction::CorrectionType Correction::processSkipChar( Correction::CorrectionType Correction::processSkipChar(
@ -231,8 +225,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
int matchedProximityCharId = mProximityInfo->getMatchedProximityId( int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
inputIndexForProximity, c, checkProximityChars); inputIndexForProximity, c, checkProximityChars);
const bool unrelated = ProximityInfo::UNRELATED_CHAR == matchedProximityCharId; if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
if (unrelated) {
if (skip) { if (skip) {
// Skip this letter and continue deeper // Skip this letter and continue deeper
++mSkippedCount; ++mSkippedCount;
@ -240,19 +233,15 @@ Correction::CorrectionType Correction::processCharAndCalcState(
} else { } else {
return UNRELATED; return UNRELATED;
} }
} else if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
// If inputIndex is greater than mInputLength, that means there is no
// proximity chars. So, we don't need to check proximity.
mMatching = true;
} else if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
incrementProximityCount();
} }
mWord[mOutputIndex] = c; mWord[mOutputIndex] = c;
// If inputIndex is greater than mInputLength, that means there is no
// proximity chars. So, we don't need to check proximity.
if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
mMatching = true;
charMatched();
}
if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
incrementDiffs();
}
const bool isSameAsUserTypedLength = mInputLength const bool isSameAsUserTypedLength = mInputLength
== getInputIndex() + 1 == getInputIndex() + 1
@ -336,24 +325,25 @@ inline static void multiplyRate(const int rate, int *freq) {
////////////////////// //////////////////////
/* static */ /* static */
int Correction::RankingAlgorithm::calculateFinalFreq( int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
const int inputIndex, const int outputIndex, const int freq, const bool sameLength, const Correction* correction) {
const int matchCount, const int freq, const bool sameLength,
const Correction* correction) {
const int skipPos = correction->getSkipPos();
const int excessivePos = correction->getExcessivePos(); const int excessivePos = correction->getExcessivePos();
const int transposedPos = correction->getTransposedPos(); const int transposedPos = correction->getTransposedPos();
const int inputLength = correction->mInputLength; const int inputLength = correction->mInputLength;
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER; const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
const ProximityInfo *proximityInfo = correction->mProximityInfo; const ProximityInfo *proximityInfo = correction->mProximityInfo;
// TODO: use mExcessiveCount
const int matchCount = inputLength - correction->mProximityCount - (excessivePos >= 0 ? 1 : 0);
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount); const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
const unsigned short* word = correction->mWord; const unsigned short* word = correction->mWord;
const int skippedCount = correction->mSkippedCount; const bool skipped = correction->mSkippedCount > 0;
// TODO: Demote by edit distance // TODO: Demote by edit distance
int finalFreq = freq * matchWeight; int finalFreq = freq * matchWeight;
if (skipPos >= 0) { if (skipped) {
if (inputLength >= 2) { if (inputLength >= 2) {
const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE
* (10 * inputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X) * (10 * inputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
@ -387,10 +377,10 @@ int Correction::RankingAlgorithm::calculateFinalFreq(
} }
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq); multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
} }
if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) { if (sameLength && transposedPos < 0 && !skipped && excessivePos < 0) {
finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq); finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
} }
} else if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0 } else if (sameLength && transposedPos < 0 && !skipped && excessivePos < 0
&& outputIndex > 0) { && outputIndex > 0) {
// A word with proximity corrections // A word with proximity corrections
if (DEBUG_DICT) { if (DEBUG_DICT) {
@ -418,7 +408,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(
s ... skipping s ... skipping
a ... traversing all a ... traversing all
*/ */
if (matchCount == inputLength && matchCount >= 2 && skippedCount == 0 if (matchCount == inputLength && matchCount >= 2 && !skipped
&& word[matchCount] == word[matchCount - 1]) { && word[matchCount] == word[matchCount - 1]) {
multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq); multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
} }

View file

@ -48,8 +48,6 @@ public:
void checkState(); void checkState();
bool initProcessState(const int index); bool initProcessState(const int index);
void getProcessState(int *matchedCount, int *inputIndex, int *outputIndex,
bool *traverseAllNodes, int *diffs);
int getOutputIndex(); int getOutputIndex();
int getInputIndex(); int getInputIndex();
@ -80,10 +78,6 @@ public:
CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
int getDiffs() const {
return mDiffs;
}
///////////////////////// /////////////////////////
// Tree helper methods // Tree helper methods
int goDownTree(const int parentIndex, const int childCount, const int firstChildPos); int goDownTree(const int parentIndex, const int childCount, const int firstChildPos);
@ -100,7 +94,6 @@ public:
return mCorrectionStates[index].mParentIndex; return mCorrectionStates[index].mParentIndex;
} }
private: private:
inline void charMatched();
inline void incrementInputIndex(); inline void incrementInputIndex();
inline void incrementOutputIndex(); inline void incrementOutputIndex();
inline bool needsToTraverseAllNodes(); inline bool needsToTraverseAllNodes();
@ -109,8 +102,8 @@ private:
inline CorrectionType processSkipChar(const int32_t c, const bool isTerminal); inline CorrectionType processSkipChar(const int32_t c, const bool isTerminal);
// TODO: remove // TODO: remove
inline void incrementDiffs() { inline void incrementProximityCount() {
++mDiffs; ++mProximityCount;
} }
const int TYPED_LETTER_MULTIPLIER; const int TYPED_LETTER_MULTIPLIER;
@ -133,8 +126,7 @@ private:
// The following member variables are being used as cache values of the correction state. // The following member variables are being used as cache values of the correction state.
int mOutputIndex; int mOutputIndex;
int mInputIndex; int mInputIndex;
int mDiffs; int mProximityCount;
int mMatchedCharCount;
int mSkippedCount; int mSkippedCount;
int mSkipPos; int mSkipPos;
bool mNeedsToTraverseAllNodes; bool mNeedsToTraverseAllNodes;
@ -144,8 +136,7 @@ private:
class RankingAlgorithm { class RankingAlgorithm {
public: public:
static int calculateFinalFreq(const int inputIndex, const int depth, static int calculateFinalFreq(const int inputIndex, const int depth,
const int matchCount, const int freq, const bool sameLength, const int freq, const bool sameLength, const Correction* correction);
const Correction* correction);
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq, static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
const Correction* correction); const Correction* correction);
}; };

View file

@ -28,8 +28,7 @@ struct CorrectionState {
int mSiblingPos; int mSiblingPos;
uint16_t mChildCount; uint16_t mChildCount;
uint8_t mInputIndex; uint8_t mInputIndex;
uint8_t mDiffs; uint8_t mProximityCount;
uint8_t mMatchedCount;
uint8_t mSkippedCount; uint8_t mSkippedCount;
int8_t mSkipPos; // should be signed int8_t mSkipPos; // should be signed
bool mMatching; bool mMatching;
@ -43,9 +42,8 @@ inline static void initCorrectionState(CorrectionState *state, const int rootPos
state->mParentIndex = -1; state->mParentIndex = -1;
state->mChildCount = childCount; state->mChildCount = childCount;
state->mInputIndex = 0; state->mInputIndex = 0;
state->mDiffs = 0; state->mProximityCount = 0;
state->mSiblingPos = rootPos; state->mSiblingPos = rootPos;
state->mMatchedCount = 0;
state->mSkippedCount = 0; state->mSkippedCount = 0;
state->mMatching = false; state->mMatching = false;
state->mSkipping = false; state->mSkipping = false;