Clean up two word correction

Change-Id: I5cd2697d7f61b81aff0c249df01479d86ad0fba5
main
satok 2012-01-23 12:30:20 +09:00
parent 530f5d7e15
commit bd6ccdd5f0
1 changed files with 67 additions and 51 deletions

View File

@ -389,24 +389,28 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, const int missingSpacePos, const bool useFullEditDistance, const int inputLength, const int missingSpacePos,
const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) { const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) {
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); if (inputLength >= MAX_WORD_LENGTH) return;
if (DEBUG_DICT) { if (DEBUG_DICT) {
int inputCount = 0; int inputCount = 0;
if (spaceProximityPos >= 0) ++inputCount; if (spaceProximityPos >= 0) ++inputCount;
if (missingSpacePos >= 0) ++inputCount; if (missingSpacePos >= 0) ++inputCount;
assert(inputCount <= 1); assert(inputCount <= 1);
} }
const bool isSpaceProximity = spaceProximityPos >= 0;
const int firstWordStartPos = 0;
const int firstTypedWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
int firstFreq = getMostFrequentWordLike(0, firstTypedWordLength, proximityInfo, mWord);
unsigned short* firstWord = 0; const bool isSpaceProximity = spaceProximityPos >= 0;
int firstWordLength = 0;
// First word
const int firstInputWordStartPos = 0;
const int firstInputWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
int firstFreq = getMostFrequentWordLike(
firstInputWordStartPos, firstInputWordLength, proximityInfo, mWord);
unsigned short* firstOutputWord = 0;
int firstOutputWordLength = 0;
if (firstFreq > 0) { if (firstFreq > 0) {
firstWordLength = firstTypedWordLength; firstOutputWordLength = firstInputWordLength;
firstWord = mWord; firstOutputWord = mWord;
} else { } else {
if (masterQueue->size() > 0) { if (masterQueue->size() > 0) {
double nsForMaster = masterQueue->getHighestNormalizedScore( double nsForMaster = masterQueue->getHighestNormalizedScore(
@ -416,80 +420,92 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
return; return;
} }
} }
WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstTypedWordLength); WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstInputWordLength);
if (firstWordQueue->size() < 1) { if (firstWordQueue->size() < 1) {
return; return;
} }
int score = 0; int score = 0;
const double ns = firstWordQueue->getHighestNormalizedScore( const double ns = firstWordQueue->getHighestNormalizedScore(
proximityInfo->getPrimaryInputWord(), firstTypedWordLength, &firstWord, &score, proximityInfo->getPrimaryInputWord(), firstInputWordLength,
&firstWordLength); &firstOutputWord, &score, &firstOutputWordLength);
// Two words correction won't be done if the score of the first word doesn't exceed the // Two words correction won't be done if the score of the first word doesn't exceed the
// threshold. // threshold.
if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD) { if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD) {
return; return;
} }
firstFreq = score >> (firstWordLength firstFreq = score >> (firstOutputWordLength
+ TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
} }
if (firstFreq <= 0) {
return;
}
const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
const int secondWordLength = isSpaceProximity
? (inputLength - spaceProximityPos - 1)
: (inputLength - missingSpacePos);
if (inputLength >= MAX_WORD_LENGTH) return;
if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
|| firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
return;
const int newWordLength = firstWordLength + secondWordLength + 1;
// Space proximity preparation
//WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
//initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue,
//correction);
//getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false,
//MAX_ERRORS_FOR_TWO_WORDS);
// Allocating variable length array on stack
unsigned short word[newWordLength];
if (DEBUG_DICT) { if (DEBUG_DICT) {
AKLOGI("First freq: %d", firstFreq); AKLOGI("First freq: %d", firstFreq);
} }
for (int i = 0; i < firstWordLength; ++i) { if (firstFreq <= 0 || firstOutputWordLength <= 0 || MAX_WORD_LENGTH <= firstOutputWordLength) {
word[i] = firstWord[i]; return;
}
// Allocating fixed length array on stack
unsigned short outputWord[MAX_WORD_LENGTH];
int outputWordLength = 0;
for (int i = 0; i < firstOutputWordLength; ++i) {
outputWord[i] = firstOutputWord[i];
}
outputWord[firstOutputWordLength] = SPACE;
outputWordLength = firstOutputWordLength + 1;
//const int outputWordLength = firstOutputWordLength + secondWordLength + 1;
// Space proximity preparation
//WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
//initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstOutputWordLength,
//subQueue, correction);
//getSuggestionCandidates(useFullEditDistance, firstOutputWordLength, correction, subQueue,
//false, MAX_ERRORS_FOR_TWO_WORDS);
// Second word
const int secondInputWordLength = isSpaceProximity
? (inputLength - spaceProximityPos - 1)
: (inputLength - missingSpacePos);
const int secondInputWordStartPos =
isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
int secondFreq = getMostFrequentWordLike(
secondInputWordStartPos, secondInputWordLength, proximityInfo, mWord);
unsigned short* secondOutputWord = 0;
int secondOutputWordLength = 0;
if (secondFreq > 0) {
secondOutputWordLength = secondInputWordLength;
secondOutputWord = mWord;
} }
const int secondFreq = getMostFrequentWordLike(
secondWordStartPos, secondWordLength, proximityInfo, mWord);
if (DEBUG_DICT) { if (DEBUG_DICT) {
AKLOGI("Second freq: %d", secondFreq); AKLOGI("Second freq: %d", secondFreq);
} }
if (secondFreq <= 0) return;
word[firstWordLength] = SPACE; if (secondFreq <= 0 || secondOutputWordLength <= 0
for (int i = (firstWordLength + 1); i < newWordLength; ++i) { || MAX_WORD_LENGTH <= (firstOutputWordLength + 1 + secondOutputWordLength)) {
word[i] = mWord[i - firstWordLength - 1]; return;
} }
for (int i = 0; i < secondOutputWordLength; ++i) {
outputWord[firstOutputWordLength + 1 + i] = secondOutputWord[i];
}
outputWordLength += secondOutputWordLength;
// TODO: Remove initSuggestions and correction->setCorrectionParams // TODO: Remove initSuggestions and correction->setCorrectionParams
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, spaceProximityPos, missingSpacePos, -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word); const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, outputWord);
if (DEBUG_DICT) { if (DEBUG_DICT) {
AKLOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); AKLOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
} }
addWord(word, newWordLength, pairFreq, masterQueue); addWord(outputWord, outputWordLength, pairFreq, masterQueue);
return; return;
} }