Prepare for proximity + two word correction No2
Change-Id: Idfa1413e853299f1db459ef07da3efa932047981main
parent
42fcb2de64
commit
1a6da631ab
|
@ -38,8 +38,28 @@ inline static void initEditDistance(int *editDistanceTable) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int inputLength,
|
||||||
|
const int outputLength) {
|
||||||
|
if (DEBUG_DICT) {
|
||||||
|
LOGI("EditDistanceTable");
|
||||||
|
for (int i = 0; i <= 10; ++i) {
|
||||||
|
int c[11];
|
||||||
|
for (int j = 0; j <= 10; ++j) {
|
||||||
|
if (j < inputLength + 1 && i < outputLength + 1) {
|
||||||
|
c[j] = (editDistanceTable + i * (inputLength + 1))[j];
|
||||||
|
} else {
|
||||||
|
c[j] = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
|
||||||
|
c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
|
inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
|
||||||
const int inputLength, const unsigned short *output, const int outputLength) {
|
const int inputLength, const unsigned short *output, const int outputLength) {
|
||||||
|
// TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
|
||||||
// Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j].
|
// Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j].
|
||||||
// Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated,
|
// Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated,
|
||||||
// and calculate dp[ouputLength][0] ... dp[outputLength][inputLength].
|
// and calculate dp[ouputLength][0] ... dp[outputLength][inputLength].
|
||||||
|
@ -62,6 +82,9 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
|
||||||
|
|
||||||
inline static int getCurrentEditDistance(
|
inline static int getCurrentEditDistance(
|
||||||
int *editDistanceTable, const int inputLength, const int outputLength) {
|
int *editDistanceTable, const int inputLength, const int outputLength) {
|
||||||
|
if (DEBUG_DICT) {
|
||||||
|
LOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
|
||||||
|
}
|
||||||
return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1];
|
return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -90,6 +113,9 @@ void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
|
||||||
mInputLength = inputLength;
|
mInputLength = inputLength;
|
||||||
mMaxDepth = maxDepth;
|
mMaxDepth = maxDepth;
|
||||||
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
|
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
|
||||||
|
// TODO: This is not supposed to be required. Check what's going wrong with
|
||||||
|
// editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL]
|
||||||
|
initEditDistance(mEditDistanceTable);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Correction::initCorrectionState(
|
void Correction::initCorrectionState(
|
||||||
|
@ -620,6 +646,9 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
|
||||||
|
|
||||||
// TODO: Calculate edit distance for transposed and excessive
|
// TODO: Calculate edit distance for transposed and excessive
|
||||||
int ed = 0;
|
int ed = 0;
|
||||||
|
if (DEBUG_DICT_FULL) {
|
||||||
|
dumpEditDistance10ForDebug(editDistanceTable, inputLength, outputIndex + 1);
|
||||||
|
}
|
||||||
int adjustedProximityMatchedCount = proximityMatchedCount;
|
int adjustedProximityMatchedCount = proximityMatchedCount;
|
||||||
|
|
||||||
int finalFreq = freq;
|
int finalFreq = freq;
|
||||||
|
|
|
@ -98,7 +98,7 @@ static void prof_out(void) {
|
||||||
#define DEBUG_SHOW_FOUND_WORD false
|
#define DEBUG_SHOW_FOUND_WORD false
|
||||||
#define DEBUG_NODE DEBUG_DICT_FULL
|
#define DEBUG_NODE DEBUG_DICT_FULL
|
||||||
#define DEBUG_TRACE DEBUG_DICT_FULL
|
#define DEBUG_TRACE DEBUG_DICT_FULL
|
||||||
#define DEBUG_PROXIMITY_INFO true
|
#define DEBUG_PROXIMITY_INFO false
|
||||||
#define DEBUG_CORRECTION false
|
#define DEBUG_CORRECTION false
|
||||||
#define DEBUG_CORRECTION_FREQ true
|
#define DEBUG_CORRECTION_FREQ true
|
||||||
#define DEBUG_WORDS_PRIORITY_QUEUE true
|
#define DEBUG_WORDS_PRIORITY_QUEUE true
|
||||||
|
|
|
@ -103,7 +103,7 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
|
||||||
if (x < 0 || y < 0) {
|
if (x < 0 || y < 0) {
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
LOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y);
|
LOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y);
|
||||||
assert(true);
|
assert(false);
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -243,13 +243,17 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
|
void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
|
||||||
const int *yCoordinates, const int *codes, const int codesSize,
|
const int *yCoordinates, const int *codes, const int inputLength,
|
||||||
WordsPriorityQueue *queue) {
|
WordsPriorityQueue *queue, Correction *correction) {
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
LOGI("initSuggest");
|
LOGI("initSuggest");
|
||||||
}
|
}
|
||||||
proximityInfo->setInputParams(codes, codesSize, xCoordinates, yCoordinates);
|
proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
|
||||||
queue->clear();
|
if (queue) {
|
||||||
|
queue->clear();
|
||||||
|
}
|
||||||
|
const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
||||||
|
correction->initCorrection(proximityInfo, inputLength, maxDepth);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char QUOTE = '\'';
|
static const char QUOTE = '\'';
|
||||||
|
@ -260,19 +264,19 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
|
||||||
const bool useFullEditDistance, const int inputLength, Correction *correction,
|
const bool useFullEditDistance, const int inputLength, Correction *correction,
|
||||||
WordsPriorityQueuePool *queuePool) {
|
WordsPriorityQueuePool *queuePool) {
|
||||||
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
|
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
|
||||||
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue);
|
initSuggestions(
|
||||||
if (DEBUG_DICT) assert(codesSize == inputLength);
|
proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction);
|
||||||
const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue,
|
||||||
correction->initCorrection(proximityInfo, inputLength, maxDepth);
|
true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
|
||||||
getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
|
void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
|
||||||
const int inputLength, Correction *correction, WordsPriorityQueue *queue) {
|
const int inputLength, Correction *correction, WordsPriorityQueue *queue,
|
||||||
|
const bool doAutoCompletion, const int maxErrors) {
|
||||||
// TODO: Remove setCorrectionParams
|
// TODO: Remove setCorrectionParams
|
||||||
correction->setCorrectionParams(0, 0, 0,
|
correction->setCorrectionParams(0, 0, 0,
|
||||||
-1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
|
-1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
|
||||||
true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
|
doAutoCompletion, maxErrors);
|
||||||
int rootPosition = ROOT_POS;
|
int rootPosition = ROOT_POS;
|
||||||
// Get the number of children of root, then increment the position
|
// Get the number of children of root, then increment the position
|
||||||
int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
|
int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
|
||||||
|
@ -306,9 +310,6 @@ void UnigramDictionary::getMissingSpaceWords(ProximityInfo *proximityInfo, const
|
||||||
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
|
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
|
||||||
const int inputLength, const int missingSpacePos, Correction *correction,
|
const int inputLength, const int missingSpacePos, Correction *correction,
|
||||||
WordsPriorityQueuePool* queuePool) {
|
WordsPriorityQueuePool* queuePool) {
|
||||||
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
|
|
||||||
-1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos,
|
|
||||||
useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
|
|
||||||
getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
|
getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
|
||||||
useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */,
|
useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */,
|
||||||
correction, queuePool);
|
correction, queuePool);
|
||||||
|
@ -318,9 +319,6 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons
|
||||||
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
|
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
|
||||||
const int inputLength, const int spaceProximityPos, Correction *correction,
|
const int inputLength, const int spaceProximityPos, Correction *correction,
|
||||||
WordsPriorityQueuePool* queuePool) {
|
WordsPriorityQueuePool* queuePool) {
|
||||||
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
|
|
||||||
-1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */,
|
|
||||||
useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
|
|
||||||
getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
|
getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
|
||||||
useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos,
|
useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos,
|
||||||
correction, queuePool);
|
correction, queuePool);
|
||||||
|
@ -362,6 +360,15 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const int newWordLength = firstWordLength + secondWordLength + 1;
|
const int newWordLength = firstWordLength + secondWordLength + 1;
|
||||||
|
|
||||||
|
|
||||||
|
// Space proximity preparation
|
||||||
|
//WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
|
||||||
|
//initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue,
|
||||||
|
//correction);
|
||||||
|
//getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false,
|
||||||
|
//MAX_ERRORS_FOR_TWO_WORDS);
|
||||||
|
|
||||||
// Allocating variable length array on stack
|
// Allocating variable length array on stack
|
||||||
unsigned short word[newWordLength];
|
unsigned short word[newWordLength];
|
||||||
const int firstFreq = getMostFrequentWordLike(
|
const int firstFreq = getMostFrequentWordLike(
|
||||||
|
@ -387,6 +394,13 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
|
||||||
word[i] = mWord[i - firstWordLength - 1];
|
word[i] = mWord[i - firstWordLength - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Remove initSuggestions and correction->setCorrectionParams
|
||||||
|
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength,
|
||||||
|
0 /* do not clear queue */, correction);
|
||||||
|
|
||||||
|
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
|
||||||
|
-1 /* transposedPos */, spaceProximityPos, missingSpacePos,
|
||||||
|
useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
|
||||||
const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word);
|
const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word);
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
|
LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
|
||||||
|
|
|
@ -90,13 +90,13 @@ private:
|
||||||
WordsPriorityQueuePool* queuePool);
|
WordsPriorityQueuePool* queuePool);
|
||||||
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||||
const int *ycoordinates, const int *codes, const int codesSize,
|
const int *ycoordinates, const int *codes, const int codesSize,
|
||||||
WordsPriorityQueue *queue);
|
WordsPriorityQueue *queue, Correction *correction);
|
||||||
void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||||
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
|
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
|
||||||
const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
|
const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
|
||||||
void getSuggestionCandidates(
|
void getSuggestionCandidates(
|
||||||
const bool useFullEditDistance, const int inputLength, Correction *correction,
|
const bool useFullEditDistance, const int inputLength, Correction *correction,
|
||||||
WordsPriorityQueue* queue);
|
WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors);
|
||||||
void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
|
void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
|
||||||
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
||||||
const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
|
const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
|
||||||
|
|
Loading…
Reference in New Issue