Prepare for proximity + two word correction No2

Change-Id: Idfa1413e853299f1db459ef07da3efa932047981
main
satok 2011-12-16 23:15:06 +09:00
parent 42fcb2de64
commit 1a6da631ab
5 changed files with 64 additions and 21 deletions

View File

@ -38,8 +38,28 @@ inline static void initEditDistance(int *editDistanceTable) {
} }
} }
inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int inputLength,
const int outputLength) {
if (DEBUG_DICT) {
LOGI("EditDistanceTable");
for (int i = 0; i <= 10; ++i) {
int c[11];
for (int j = 0; j <= 10; ++j) {
if (j < inputLength + 1 && i < outputLength + 1) {
c[j] = (editDistanceTable + i * (inputLength + 1))[j];
} else {
c[j] = -1;
}
}
LOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
}
}
}
inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input, inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
const int inputLength, const unsigned short *output, const int outputLength) { const int inputLength, const unsigned short *output, const int outputLength) {
// TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
// Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j]. // Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j].
// Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated, // Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated,
// and calculate dp[ouputLength][0] ... dp[outputLength][inputLength]. // and calculate dp[ouputLength][0] ... dp[outputLength][inputLength].
@ -62,6 +82,9 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
inline static int getCurrentEditDistance( inline static int getCurrentEditDistance(
int *editDistanceTable, const int inputLength, const int outputLength) { int *editDistanceTable, const int inputLength, const int outputLength) {
if (DEBUG_DICT) {
LOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
}
return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1]; return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1];
} }
@ -90,6 +113,9 @@ void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
mInputLength = inputLength; mInputLength = inputLength;
mMaxDepth = maxDepth; mMaxDepth = maxDepth;
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
// TODO: This is not supposed to be required. Check what's going wrong with
// editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL]
initEditDistance(mEditDistanceTable);
} }
void Correction::initCorrectionState( void Correction::initCorrectionState(
@ -620,6 +646,9 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
// TODO: Calculate edit distance for transposed and excessive // TODO: Calculate edit distance for transposed and excessive
int ed = 0; int ed = 0;
if (DEBUG_DICT_FULL) {
dumpEditDistance10ForDebug(editDistanceTable, inputLength, outputIndex + 1);
}
int adjustedProximityMatchedCount = proximityMatchedCount; int adjustedProximityMatchedCount = proximityMatchedCount;
int finalFreq = freq; int finalFreq = freq;

View File

@ -98,7 +98,7 @@ static void prof_out(void) {
#define DEBUG_SHOW_FOUND_WORD false #define DEBUG_SHOW_FOUND_WORD false
#define DEBUG_NODE DEBUG_DICT_FULL #define DEBUG_NODE DEBUG_DICT_FULL
#define DEBUG_TRACE DEBUG_DICT_FULL #define DEBUG_TRACE DEBUG_DICT_FULL
#define DEBUG_PROXIMITY_INFO true #define DEBUG_PROXIMITY_INFO false
#define DEBUG_CORRECTION false #define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ true #define DEBUG_CORRECTION_FREQ true
#define DEBUG_WORDS_PRIORITY_QUEUE true #define DEBUG_WORDS_PRIORITY_QUEUE true

View File

@ -103,7 +103,7 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
if (x < 0 || y < 0) { if (x < 0 || y < 0) {
if (DEBUG_DICT) { if (DEBUG_DICT) {
LOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y); LOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y);
assert(true); assert(false);
} }
return false; return false;
} }

View File

@ -243,13 +243,17 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
} }
void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates, void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
const int *yCoordinates, const int *codes, const int codesSize, const int *yCoordinates, const int *codes, const int inputLength,
WordsPriorityQueue *queue) { WordsPriorityQueue *queue, Correction *correction) {
if (DEBUG_DICT) { if (DEBUG_DICT) {
LOGI("initSuggest"); LOGI("initSuggest");
} }
proximityInfo->setInputParams(codes, codesSize, xCoordinates, yCoordinates); proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
queue->clear(); if (queue) {
queue->clear();
}
const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
correction->initCorrection(proximityInfo, inputLength, maxDepth);
} }
static const char QUOTE = '\''; static const char QUOTE = '\'';
@ -260,19 +264,19 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
const bool useFullEditDistance, const int inputLength, Correction *correction, const bool useFullEditDistance, const int inputLength, Correction *correction,
WordsPriorityQueuePool *queuePool) { WordsPriorityQueuePool *queuePool) {
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue); initSuggestions(
if (DEBUG_DICT) assert(codesSize == inputLength); proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction);
const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue,
correction->initCorrection(proximityInfo, inputLength, maxDepth); true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue);
} }
void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
const int inputLength, Correction *correction, WordsPriorityQueue *queue) { const int inputLength, Correction *correction, WordsPriorityQueue *queue,
const bool doAutoCompletion, const int maxErrors) {
// TODO: Remove setCorrectionParams // TODO: Remove setCorrectionParams
correction->setCorrectionParams(0, 0, 0, correction->setCorrectionParams(0, 0, 0,
-1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance, -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
true /* doAutoCompletion */, DEFAULT_MAX_ERRORS); doAutoCompletion, maxErrors);
int rootPosition = ROOT_POS; int rootPosition = ROOT_POS;
// Get the number of children of root, then increment the position // Get the number of children of root, then increment the position
int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition); int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
@ -306,9 +310,6 @@ void UnigramDictionary::getMissingSpaceWords(ProximityInfo *proximityInfo, const
const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int missingSpacePos, Correction *correction, const int inputLength, const int missingSpacePos, Correction *correction,
WordsPriorityQueuePool* queuePool) { WordsPriorityQueuePool* queuePool) {
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos,
useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */, useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */,
correction, queuePool); correction, queuePool);
@ -318,9 +319,6 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons
const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int spaceProximityPos, Correction *correction, const int inputLength, const int spaceProximityPos, Correction *correction,
WordsPriorityQueuePool* queuePool) { WordsPriorityQueuePool* queuePool) {
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */,
useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos, useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos,
correction, queuePool); correction, queuePool);
@ -362,6 +360,15 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
return; return;
const int newWordLength = firstWordLength + secondWordLength + 1; const int newWordLength = firstWordLength + secondWordLength + 1;
// Space proximity preparation
//WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
//initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue,
//correction);
//getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false,
//MAX_ERRORS_FOR_TWO_WORDS);
// Allocating variable length array on stack // Allocating variable length array on stack
unsigned short word[newWordLength]; unsigned short word[newWordLength];
const int firstFreq = getMostFrequentWordLike( const int firstFreq = getMostFrequentWordLike(
@ -387,6 +394,13 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
word[i] = mWord[i - firstWordLength - 1]; word[i] = mWord[i - firstWordLength - 1];
} }
// TODO: Remove initSuggestions and correction->setCorrectionParams
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength,
0 /* do not clear queue */, correction);
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, spaceProximityPos, missingSpacePos,
useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word); const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word);
if (DEBUG_DICT) { if (DEBUG_DICT) {
LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);

View File

@ -90,13 +90,13 @@ private:
WordsPriorityQueuePool* queuePool); WordsPriorityQueuePool* queuePool);
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, const int *ycoordinates, const int *codes, const int codesSize,
WordsPriorityQueue *queue); WordsPriorityQueue *queue, Correction *correction);
void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool); const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
void getSuggestionCandidates( void getSuggestionCandidates(
const bool useFullEditDistance, const int inputLength, Correction *correction, const bool useFullEditDistance, const int inputLength, Correction *correction,
WordsPriorityQueue* queue); WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors);
void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,