Prepare for proximity + two word correction No2

Change-Id: Idfa1413e853299f1db459ef07da3efa932047981
main
satok 2011-12-16 23:15:06 +09:00
parent 42fcb2de64
commit 1a6da631ab
5 changed files with 64 additions and 21 deletions

View File

@ -38,8 +38,28 @@ inline static void initEditDistance(int *editDistanceTable) {
}
}
inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int inputLength,
const int outputLength) {
if (DEBUG_DICT) {
LOGI("EditDistanceTable");
for (int i = 0; i <= 10; ++i) {
int c[11];
for (int j = 0; j <= 10; ++j) {
if (j < inputLength + 1 && i < outputLength + 1) {
c[j] = (editDistanceTable + i * (inputLength + 1))[j];
} else {
c[j] = -1;
}
}
LOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
}
}
}
inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
const int inputLength, const unsigned short *output, const int outputLength) {
// TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
// Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j].
// Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated,
// and calculate dp[ouputLength][0] ... dp[outputLength][inputLength].
@ -62,6 +82,9 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
inline static int getCurrentEditDistance(
int *editDistanceTable, const int inputLength, const int outputLength) {
if (DEBUG_DICT) {
LOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
}
return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1];
}
@ -90,6 +113,9 @@ void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
mInputLength = inputLength;
mMaxDepth = maxDepth;
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
// TODO: This is not supposed to be required. Check what's going wrong with
// editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL]
initEditDistance(mEditDistanceTable);
}
void Correction::initCorrectionState(
@ -620,6 +646,9 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
// TODO: Calculate edit distance for transposed and excessive
int ed = 0;
if (DEBUG_DICT_FULL) {
dumpEditDistance10ForDebug(editDistanceTable, inputLength, outputIndex + 1);
}
int adjustedProximityMatchedCount = proximityMatchedCount;
int finalFreq = freq;

View File

@ -98,7 +98,7 @@ static void prof_out(void) {
#define DEBUG_SHOW_FOUND_WORD false
#define DEBUG_NODE DEBUG_DICT_FULL
#define DEBUG_TRACE DEBUG_DICT_FULL
#define DEBUG_PROXIMITY_INFO true
#define DEBUG_PROXIMITY_INFO false
#define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ true
#define DEBUG_WORDS_PRIORITY_QUEUE true

View File

@ -103,7 +103,7 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
if (x < 0 || y < 0) {
if (DEBUG_DICT) {
LOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y);
assert(true);
assert(false);
}
return false;
}

View File

@ -243,13 +243,17 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
}
void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
const int *yCoordinates, const int *codes, const int codesSize,
WordsPriorityQueue *queue) {
const int *yCoordinates, const int *codes, const int inputLength,
WordsPriorityQueue *queue, Correction *correction) {
if (DEBUG_DICT) {
LOGI("initSuggest");
}
proximityInfo->setInputParams(codes, codesSize, xCoordinates, yCoordinates);
proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
if (queue) {
queue->clear();
}
const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
correction->initCorrection(proximityInfo, inputLength, maxDepth);
}
static const char QUOTE = '\'';
@ -260,19 +264,19 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
const bool useFullEditDistance, const int inputLength, Correction *correction,
WordsPriorityQueuePool *queuePool) {
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue);
if (DEBUG_DICT) assert(codesSize == inputLength);
const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
correction->initCorrection(proximityInfo, inputLength, maxDepth);
getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue);
initSuggestions(
proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction);
getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue,
true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
}
void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
const int inputLength, Correction *correction, WordsPriorityQueue *queue) {
const int inputLength, Correction *correction, WordsPriorityQueue *queue,
const bool doAutoCompletion, const int maxErrors) {
// TODO: Remove setCorrectionParams
correction->setCorrectionParams(0, 0, 0,
-1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
doAutoCompletion, maxErrors);
int rootPosition = ROOT_POS;
// Get the number of children of root, then increment the position
int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
@ -306,9 +310,6 @@ void UnigramDictionary::getMissingSpaceWords(ProximityInfo *proximityInfo, const
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int missingSpacePos, Correction *correction,
WordsPriorityQueuePool* queuePool) {
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos,
useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */,
correction, queuePool);
@ -318,9 +319,6 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int spaceProximityPos, Correction *correction,
WordsPriorityQueuePool* queuePool) {
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */,
useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos,
correction, queuePool);
@ -362,6 +360,15 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
return;
const int newWordLength = firstWordLength + secondWordLength + 1;
// Space proximity preparation
//WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
//initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue,
//correction);
//getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false,
//MAX_ERRORS_FOR_TWO_WORDS);
// Allocating variable length array on stack
unsigned short word[newWordLength];
const int firstFreq = getMostFrequentWordLike(
@ -387,6 +394,13 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
word[i] = mWord[i - firstWordLength - 1];
}
// TODO: Remove initSuggestions and correction->setCorrectionParams
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength,
0 /* do not clear queue */, correction);
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, spaceProximityPos, missingSpacePos,
useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word);
if (DEBUG_DICT) {
LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);

View File

@ -90,13 +90,13 @@ private:
WordsPriorityQueuePool* queuePool);
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
WordsPriorityQueue *queue);
WordsPriorityQueue *queue, Correction *correction);
void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
void getSuggestionCandidates(
const bool useFullEditDistance, const int inputLength, Correction *correction,
WordsPriorityQueue* queue);
WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors);
void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,