Do other error correction for the second word of two word correction

result: I4e0b68a12190933f9

Change-Id: I98afce6fe4d5bde97392146d204370ba31a72566
main
satok 2012-01-23 16:52:37 +09:00
parent f9521c6f37
commit 8330b488e9
7 changed files with 181 additions and 101 deletions

View File

@ -269,7 +269,7 @@ bool Correction::needsToPrune() const {
// TODO: use edit distance here // TODO: use edit distance here
return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance
// Allow one char longer word for missing character // Allow one char longer word for missing character
|| (!mDoAutoCompletion && (mOutputIndex + 1 >= mInputLength)); || (!mDoAutoCompletion && (mOutputIndex > mInputLength));
} }
void Correction::addCharToCurrentWord(const int32_t c) { void Correction::addCharToCurrentWord(const int32_t c) {
@ -555,55 +555,6 @@ Correction::CorrectionType Correction::processCharAndCalcState(
Correction::~Correction() { Correction::~Correction() {
} }
/////////////////////////
// static inline utils //
/////////////////////////
static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
}
static const int TWO_31ST_DIV_2 = S_INT_MAX / 2;
inline static void multiplyIntCapped(const int multiplier, int *base) {
const int temp = *base;
if (temp != S_INT_MAX) {
// Branch if multiplier == 2 for the optimization
if (multiplier == 2) {
*base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX;
} else {
// TODO: This overflow check gives a wrong answer when, for example,
// temp = 2^16 + 1 and multiplier = 2^17 + 1.
// Fix this behavior.
const int tempRetval = temp * multiplier;
*base = tempRetval >= temp ? tempRetval : S_INT_MAX;
}
}
}
inline static int powerIntCapped(const int base, const int n) {
if (n <= 0) return 1;
if (base == 2) {
return n < 31 ? 1 << n : S_INT_MAX;
} else {
int ret = base;
for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret);
return ret;
}
}
inline static void multiplyRate(const int rate, int *freq) {
if (*freq != S_INT_MAX) {
if (*freq > 1000000) {
*freq /= 100;
multiplyIntCapped(rate, freq);
} else {
multiplyIntCapped(rate, freq);
*freq /= 100;
}
}
}
inline static int getQuoteCount(const unsigned short* word, const int length) { inline static int getQuoteCount(const unsigned short* word, const int length) {
int quoteCount = 0; int quoteCount = 0;
for (int i = 0; i < length; ++i) { for (int i = 0; i < length; ++i) {
@ -939,7 +890,11 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq); multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
} }
multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq); if (isSpaceProximity) {
multiplyRate(WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE, &totalFreq);
} else {
multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
}
if (capitalizedWordDemotion) { if (capitalizedWordDemotion) {
multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq); multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);

View File

@ -36,6 +36,55 @@ class Correction {
NOT_ON_TERMINAL NOT_ON_TERMINAL
} CorrectionType; } CorrectionType;
/////////////////////////
// static inline utils //
/////////////////////////
static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
}
static const int TWO_31ST_DIV_2 = S_INT_MAX / 2;
inline static void multiplyIntCapped(const int multiplier, int *base) {
const int temp = *base;
if (temp != S_INT_MAX) {
// Branch if multiplier == 2 for the optimization
if (multiplier == 2) {
*base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX;
} else {
// TODO: This overflow check gives a wrong answer when, for example,
// temp = 2^16 + 1 and multiplier = 2^17 + 1.
// Fix this behavior.
const int tempRetval = temp * multiplier;
*base = tempRetval >= temp ? tempRetval : S_INT_MAX;
}
}
}
inline static int powerIntCapped(const int base, const int n) {
if (n <= 0) return 1;
if (base == 2) {
return n < 31 ? 1 << n : S_INT_MAX;
} else {
int ret = base;
for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret);
return ret;
}
}
inline static void multiplyRate(const int rate, int *freq) {
if (*freq != S_INT_MAX) {
if (*freq > 1000000) {
*freq /= 100;
multiplyIntCapped(rate, freq);
} else {
multiplyIntCapped(rate, freq);
*freq /= 100;
}
}
}
Correction(const int typedLetterMultiplier, const int fullWordMultiplier); Correction(const int typedLetterMultiplier, const int fullWordMultiplier);
void initCorrection( void initCorrection(
const ProximityInfo *pi, const int inputLength, const int maxWordLength); const ProximityInfo *pi, const int inputLength, const int maxWordLength);

View File

@ -188,6 +188,7 @@ static void prof_out(void) {
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 58 #define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 58
#define WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE 50
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70
@ -221,6 +222,9 @@ static void prof_out(void) {
#define MAX_DEPTH_MULTIPLIER 3 #define MAX_DEPTH_MULTIPLIER 3
#define FIRST_WORD_INDEX 1
#define SECOND_WORD_INDEX 2
// TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German // TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German
// word in the dictionary // word in the dictionary
#define DEFAULT_MAX_UMLAUT_SEARCH_DEPTH 5 #define DEFAULT_MAX_UMLAUT_SEARCH_DEPTH 5

View File

@ -159,19 +159,26 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
} }
PROF_START(20); PROF_START(20);
if (DEBUG_DICT) {
double ns = queuePool->getMasterQueue()->getHighestNormalizedScore(
proximityInfo->getPrimaryInputWord(), codesSize, 0, 0, 0);
ns += 0;
AKLOGI("Max normalized score = %f", ns);
}
const int suggestedWordsCount = const int suggestedWordsCount =
queuePool->getMasterQueue()->outputSuggestions(frequencies, outWords); queuePool->getMasterQueue()->outputSuggestions(frequencies, outWords);
if (DEBUG_DICT) { if (DEBUG_DICT) {
double ns = queuePool->getMasterQueue()->getHighestNormalizedScore(
proximityInfo->getPrimaryInputWord(), codesSize, 0, 0, 0);
ns += 0;
AKLOGI("Returning %d words", suggestedWordsCount); AKLOGI("Returning %d words", suggestedWordsCount);
/// Print the returned words /// Print the returned words
for (int j = 0; j < suggestedWordsCount; ++j) { for (int j = 0; j < suggestedWordsCount; ++j) {
#ifdef FLAG_DBG
short unsigned int* w = outWords + j * MAX_WORD_LENGTH; short unsigned int* w = outWords + j * MAX_WORD_LENGTH;
char s[MAX_WORD_LENGTH]; char s[MAX_WORD_LENGTH];
for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i]; for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
AKLOGI("%s %i", s, frequencies[j]); AKLOGI("%s %i", s, frequencies[j]);
#endif
} }
} }
PROF_END(20); PROF_END(20);
@ -205,6 +212,13 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_START(4); PROF_START(4);
// Note: This line is intentionally left blank // Note: This line is intentionally left blank
bool hasAutoCorrectionCandidate = false;
WordsPriorityQueue* masterQueue = queuePool->getMasterQueue();
if (masterQueue->size() > 0) {
double nsForMaster = masterQueue->getHighestNormalizedScore(
proximityInfo->getPrimaryInputWord(), inputLength, 0, 0, 0);
hasAutoCorrectionCandidate = (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD);
}
PROF_END(4); PROF_END(4);
PROF_START(5); PROF_START(5);
@ -216,7 +230,8 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
AKLOGI("--- Suggest missing space characters %d", i); AKLOGI("--- Suggest missing space characters %d", i);
} }
getMissingSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes, getMissingSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
useFullEditDistance, inputLength, i, correction, queuePool); useFullEditDistance, inputLength, i, correction, queuePool,
hasAutoCorrectionCandidate);
} }
} }
PROF_END(5); PROF_END(5);
@ -236,7 +251,8 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
} }
if (proximityInfo->hasSpaceProximity(x, y)) { if (proximityInfo->hasSpaceProximity(x, y)) {
getMistypedSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes, getMistypedSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
useFullEditDistance, inputLength, i, correction, queuePool); useFullEditDistance, inputLength, i, correction, queuePool,
hasAutoCorrectionCandidate);
} }
} }
} }
@ -281,12 +297,12 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
WordsPriorityQueuePool *queuePool) { WordsPriorityQueuePool *queuePool) {
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool, getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool,
true /* doAutoCompletion */, DEFAULT_MAX_ERRORS); true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX);
} }
void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool, const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool,
const bool doAutoCompletion, const int maxErrors) { const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) {
// TODO: Remove setCorrectionParams // TODO: Remove setCorrectionParams
correction->setCorrectionParams(0, 0, 0, correction->setCorrectionParams(0, 0, 0,
-1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance, -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
@ -305,7 +321,8 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
int firstChildPos; int firstChildPos;
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
correction, &childCount, &firstChildPos, &siblingPos, queuePool); correction, &childCount, &firstChildPos, &siblingPos, queuePool,
currentWordIndex);
// Update next sibling pos // Update next sibling pos
correction->setTreeSiblingPos(outputIndex, siblingPos); correction->setTreeSiblingPos(outputIndex, siblingPos);
@ -323,31 +340,32 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
void UnigramDictionary::getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates, void UnigramDictionary::getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int missingSpacePos, Correction *correction, const int inputLength, const int missingSpacePos, Correction *correction,
WordsPriorityQueuePool* queuePool) { WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate) {
getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */, useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */,
correction, queuePool); correction, queuePool, hasAutoCorrectionCandidate);
} }
void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates, void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int spaceProximityPos, Correction *correction, const int inputLength, const int spaceProximityPos, Correction *correction,
WordsPriorityQueuePool* queuePool) { WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate) {
getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos, useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos,
correction, queuePool); correction, queuePool, hasAutoCorrectionCandidate);
} }
inline void UnigramDictionary::onTerminal(const int freq, inline void UnigramDictionary::onTerminal(const int freq,
const TerminalAttributes& terminalAttributes, Correction *correction, const TerminalAttributes& terminalAttributes, Correction *correction,
WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) { WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
const int currentWordIndex) {
const int inputIndex = correction->getInputIndex(); const int inputIndex = correction->getInputIndex();
const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT; const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
int wordLength; int wordLength;
unsigned short* wordPointer; unsigned short* wordPointer;
if (addToMasterQueue) { if ((currentWordIndex == 1) && addToMasterQueue) {
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
if (finalFreq != NOT_A_FREQUENCY) { if (finalFreq != NOT_A_FREQUENCY) {
@ -376,9 +394,14 @@ inline void UnigramDictionary::onTerminal(const int freq,
// We only allow two words + other error correction for words with SUB_QUEUE_MIN_WORD_LENGTH // We only allow two words + other error correction for words with SUB_QUEUE_MIN_WORD_LENGTH
// or more length. // or more length.
if (inputIndex >= SUB_QUEUE_MIN_WORD_LENGTH && addToSubQueue) { if (inputIndex >= SUB_QUEUE_MIN_WORD_LENGTH && addToSubQueue) {
// TODO: Check the validity of "inputIndex == wordLength" WordsPriorityQueue *subQueue;
//if (addToSubQueue && inputIndex == wordLength) { if (currentWordIndex == 1) {
WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex); subQueue = queuePool->getSubQueue1(inputIndex);
} else if (currentWordIndex == 2) {
subQueue = queuePool->getSubQueue2(inputIndex);
} else {
return;
}
const int finalFreq = correction->getFinalFreqForSubQueue(freq, &wordPointer, &wordLength, const int finalFreq = correction->getFinalFreqForSubQueue(freq, &wordPointer, &wordLength,
inputIndex); inputIndex);
addWord(wordPointer, wordLength, finalFreq, subQueue); addWord(wordPointer, wordLength, finalFreq, subQueue);
@ -388,17 +411,21 @@ inline void UnigramDictionary::onTerminal(const int freq,
void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, const int missingSpacePos, const bool useFullEditDistance, const int inputLength, const int missingSpacePos,
const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) { const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool,
const bool hasAutoCorrectionCandidate) {
if (inputLength >= MAX_WORD_LENGTH) return; if (inputLength >= MAX_WORD_LENGTH) return;
if (DEBUG_DICT) { if (DEBUG_DICT) {
int inputCount = 0; int inputCount = 0;
if (spaceProximityPos >= 0) ++inputCount; if (spaceProximityPos >= 0) ++inputCount;
if (missingSpacePos >= 0) ++inputCount; if (missingSpacePos >= 0) ++inputCount;
assert(inputCount <= 1); assert(inputCount <= 1);
// MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16
assert(MAX_PROXIMITY_CHARS == 16);
} }
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
inputLength, correction);
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
const bool isSpaceProximity = spaceProximityPos >= 0; const bool isSpaceProximity = spaceProximityPos >= 0;
// First word // First word
@ -411,26 +438,22 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
if (firstFreq > 0) { if (firstFreq > 0) {
firstOutputWordLength = firstInputWordLength; firstOutputWordLength = firstInputWordLength;
firstOutputWord = mWord; firstOutputWord = mWord;
} else { } else if (!hasAutoCorrectionCandidate) {
if (masterQueue->size() > 0) {
double nsForMaster = masterQueue->getHighestNormalizedScore(
proximityInfo->getPrimaryInputWord(), inputLength, 0, 0, 0);
if (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD) {
// Do nothing if the highest suggestion exceeds the threshold.
return;
}
}
WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstInputWordLength); WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstInputWordLength);
if (firstWordQueue->size() < 1) { if (!firstWordQueue || firstWordQueue->size() < 1) {
return; return;
} }
int score = 0; int score = 0;
const double ns = firstWordQueue->getHighestNormalizedScore( const double ns = firstWordQueue->getHighestNormalizedScore(
proximityInfo->getPrimaryInputWord(), firstInputWordLength, proximityInfo->getPrimaryInputWord(), firstInputWordLength,
&firstOutputWord, &score, &firstOutputWordLength); &firstOutputWord, &score, &firstOutputWordLength);
if (DEBUG_DICT) {
AKLOGI("NS1 = %f, Score = %d", ns, score);
}
// Two words correction won't be done if the score of the first word doesn't exceed the // Two words correction won't be done if the score of the first word doesn't exceed the
// threshold. // threshold.
if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD) { if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
|| firstOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
return; return;
} }
firstFreq = score >> (firstOutputWordLength firstFreq = score >> (firstOutputWordLength
@ -456,14 +479,6 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
outputWord[firstOutputWordLength] = SPACE; outputWord[firstOutputWordLength] = SPACE;
outputWordLength = firstOutputWordLength + 1; outputWordLength = firstOutputWordLength + 1;
//const int outputWordLength = firstOutputWordLength + secondWordLength + 1;
// Space proximity preparation
//WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
//initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstOutputWordLength,
//subQueue, correction);
//getSuggestionCandidates(useFullEditDistance, firstOutputWordLength, correction, subQueue,
//false, MAX_ERRORS_FOR_TWO_WORDS);
// Second word // Second word
const int secondInputWordLength = isSpaceProximity const int secondInputWordLength = isSpaceProximity
? (inputLength - spaceProximityPos - 1) ? (inputLength - spaceProximityPos - 1)
@ -478,9 +493,42 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
if (secondFreq > 0) { if (secondFreq > 0) {
secondOutputWordLength = secondInputWordLength; secondOutputWordLength = secondInputWordLength;
secondOutputWord = mWord; secondOutputWord = mWord;
} else if (!hasAutoCorrectionCandidate) {
const int offset = secondInputWordStartPos;
initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset],
codes + offset * MAX_PROXIMITY_CHARS, secondInputWordLength, correction);
queuePool->clearSubQueue2();
getSuggestionCandidates(useFullEditDistance, secondInputWordLength, correction,
queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, SECOND_WORD_INDEX);
if (DEBUG_DICT) {
AKLOGI("Dump second word candidates %d", secondInputWordLength);
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
queuePool->getSubQueue2(i)->dumpTopWord();
}
}
WordsPriorityQueue* secondWordQueue = queuePool->getSubQueue2(secondInputWordLength);
if (!secondWordQueue || secondWordQueue->size() < 1) {
return;
}
int score = 0;
const double ns = secondWordQueue->getHighestNormalizedScore(
proximityInfo->getPrimaryInputWord(), secondInputWordLength,
&secondOutputWord, &score, &secondOutputWordLength);
if (DEBUG_DICT) {
AKLOGI("NS2 = %f, Score = %d", ns, score);
}
// Two words correction won't be done if the score of the first word doesn't exceed the
// threshold.
if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
|| secondOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
return;
}
secondFreq = score >> (secondOutputWordLength
+ TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
} }
if (DEBUG_DICT) { if (DEBUG_DICT) {
DUMP_WORD(secondOutputWord, secondOutputWordLength);
AKLOGI("Second freq: %d", secondFreq); AKLOGI("Second freq: %d", secondFreq);
} }
@ -742,7 +790,8 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
// given level, as output into newCount when traversing this level's parent. // given level, as output into newCount when traversing this level's parent.
inline bool UnigramDictionary::processCurrentNode(const int initialPos, inline bool UnigramDictionary::processCurrentNode(const int initialPos,
Correction *correction, int *newCount, Correction *correction, int *newCount,
int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool) { int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
const int currentWordIndex) {
if (DEBUG_DICT) { if (DEBUG_DICT) {
correction->checkState(); correction->checkState();
} }
@ -823,7 +872,8 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos); const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos); const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos); TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
onTerminal(freq, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal); onTerminal(freq, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal,
currentWordIndex);
// If there are more chars in this node, then this virtual node has children. // If there are more chars in this node, then this virtual node has children.
// If we are on the last char, this virtual node has children if this node has. // If we are on the last char, this virtual node has children if this node has.

View File

@ -99,11 +99,13 @@ class UnigramDictionary {
const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool); const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
void getSuggestionCandidates( void getSuggestionCandidates(
const bool useFullEditDistance, const int inputLength, Correction *correction, const bool useFullEditDistance, const int inputLength, Correction *correction,
WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors); WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors,
const int currentWordIndex);
void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool); const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool,
const bool hasAutoCorrectionCandidate);
void getSplitTwoWordsSuggestionsOld(ProximityInfo *proximityInfo, void getSplitTwoWordsSuggestionsOld(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
@ -111,18 +113,20 @@ class UnigramDictionary {
void getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates, void getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int missingSpacePos, Correction *correction, const int inputLength, const int missingSpacePos, Correction *correction,
WordsPriorityQueuePool* queuePool); WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate);
void getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates, void getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int spaceProximityPos, Correction *correction, const int inputLength, const int spaceProximityPos, Correction *correction,
WordsPriorityQueuePool* queuePool); WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate);
void onTerminal(const int freq, const TerminalAttributes& terminalAttributes, void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue); Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
const int currentWordIndex);
bool needsToSkipCurrentNode(const unsigned short c, bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth); const int inputIndex, const int skipPos, const int depth);
// Process a node by considering proximity, missing and excessive character // Process a node by considering proximity, missing and excessive character
bool processCurrentNode(const int initialPos, Correction *correction, int *newCount, bool processCurrentNode(const int initialPos, Correction *correction, int *newCount,
int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool); int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
const int currentWordIndex);
int getMostFrequentWordLike(const int startInputIndex, const int inputLength, int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
ProximityInfo *proximityInfo, unsigned short *word); ProximityInfo *proximityInfo, unsigned short *word);
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,

View File

@ -137,7 +137,7 @@ class WordsPriorityQueue {
if (size() <= 0) { if (size() <= 0) {
return; return;
} }
DUMP_WORD(mSuggestions.top()->mWord, mSuggestions.top()->mWordLength); DUMP_WORD(mHighestSuggestedWord->mWord, mHighestSuggestedWord->mWordLength);
} }
double getHighestNormalizedScore(const unsigned short* before, const int beforeLength, double getHighestNormalizedScore(const unsigned short* before, const int beforeLength,

View File

@ -45,15 +45,21 @@ class WordsPriorityQueuePool {
// TODO: Come up with more generic pool // TODO: Come up with more generic pool
WordsPriorityQueue* getSubQueue1(const int id) { WordsPriorityQueue* getSubQueue1(const int id) {
if (DEBUG_WORDS_PRIORITY_QUEUE) { if (id < 0 || id >= SUB_QUEUE_MAX_COUNT) {
assert(id >= 0 && id < SUB_QUEUE_MAX_COUNT); if (DEBUG_WORDS_PRIORITY_QUEUE) {
assert(false);
}
return 0;
} }
return mSubQueues1[id]; return mSubQueues1[id];
} }
WordsPriorityQueue* getSubQueue2(const int id) { WordsPriorityQueue* getSubQueue2(const int id) {
if (DEBUG_WORDS_PRIORITY_QUEUE) { if (id < 0 || id >= SUB_QUEUE_MAX_COUNT) {
assert(id >= 0 && id < SUB_QUEUE_MAX_COUNT); if (DEBUG_WORDS_PRIORITY_QUEUE) {
assert(false);
}
return 0;
} }
return mSubQueues2[id]; return mSubQueues2[id];
} }
@ -66,6 +72,18 @@ class WordsPriorityQueuePool {
} }
} }
inline void clearSubQueue1() {
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
mSubQueues1[i]->clear();
}
}
inline void clearSubQueue2() {
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
mSubQueues2[i]->clear();
}
}
void dumpSubQueue1TopSuggestions() { void dumpSubQueue1TopSuggestions() {
AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS"); AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS");
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {