Merge missing space and mistyped space correction algorithm
Change-Id: Idd64d38d3d29be24748f9c0359667883698a5756main
parent
5971a0a0bb
commit
9955716d0b
|
@ -158,10 +158,10 @@ void Correction::checkState() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
|
int Correction::getFreqForSplitTwoWords(const int *freqArray, const int *wordLengthArray,
|
||||||
const unsigned short *word) {
|
const bool isSpaceProximity, const unsigned short *word) {
|
||||||
return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
|
return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(freqArray, wordLengthArray, this,
|
||||||
firstFreq, secondFreq, this, word);
|
isSpaceProximity, word);
|
||||||
}
|
}
|
||||||
|
|
||||||
int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
|
int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
|
||||||
|
@ -806,21 +806,12 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
|
||||||
|
|
||||||
/* static */
|
/* static */
|
||||||
int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
|
int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
|
||||||
const int firstFreq, const int secondFreq, const Correction* correction,
|
const int *freqArray, const int *wordLengthArray, const Correction* correction,
|
||||||
const unsigned short *word) {
|
const bool isSpaceProximity, const unsigned short *word) {
|
||||||
const int spaceProximityPos = correction->mSpaceProximityPos;
|
const int firstFreq = freqArray[0];
|
||||||
const int missingSpacePos = correction->mMissingSpacePos;
|
const int secondFreq = freqArray[1];
|
||||||
if (DEBUG_DICT) {
|
const int firstWordLength = wordLengthArray[0];
|
||||||
int inputCount = 0;
|
const int secondWordLength = wordLengthArray[1];
|
||||||
if (spaceProximityPos >= 0) ++inputCount;
|
|
||||||
if (missingSpacePos >= 0) ++inputCount;
|
|
||||||
assert(inputCount <= 1);
|
|
||||||
}
|
|
||||||
const bool isSpaceProximity = spaceProximityPos >= 0;
|
|
||||||
const int inputLength = correction->mInputLength;
|
|
||||||
const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
|
|
||||||
const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
|
|
||||||
: (inputLength - missingSpacePos);
|
|
||||||
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
|
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
|
||||||
|
|
||||||
bool firstCapitalizedWordDemotion = false;
|
bool firstCapitalizedWordDemotion = false;
|
||||||
|
|
|
@ -122,7 +122,8 @@ class Correction {
|
||||||
bool needsToPrune() const;
|
bool needsToPrune() const;
|
||||||
|
|
||||||
int getFreqForSplitTwoWords(
|
int getFreqForSplitTwoWords(
|
||||||
const int firstFreq, const int secondFreq, const unsigned short *word);
|
const int *freqArray, const int *wordLengthArray, const bool isSpaceProximity,
|
||||||
|
const unsigned short *word);
|
||||||
int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
|
int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
|
||||||
int getFinalFreqForSubQueue(const int freq, unsigned short **word, int* wordLength,
|
int getFinalFreqForSubQueue(const int freq, unsigned short **word, int* wordLength,
|
||||||
const int inputLength);
|
const int inputLength);
|
||||||
|
@ -150,8 +151,9 @@ class Correction {
|
||||||
static int calculateFinalFreq(const int inputIndex, const int depth,
|
static int calculateFinalFreq(const int inputIndex, const int depth,
|
||||||
const int freq, int *editDistanceTable, const Correction* correction,
|
const int freq, int *editDistanceTable, const Correction* correction,
|
||||||
const int inputLength);
|
const int inputLength);
|
||||||
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
|
static int calcFreqForSplitTwoWords(const int *freqArray, const int *wordLengthArray,
|
||||||
const Correction* correction, const unsigned short *word);
|
const Correction* correction, const bool isSpaceProximity,
|
||||||
|
const unsigned short *word);
|
||||||
static double calcNormalizedScore(const unsigned short* before, const int beforeLength,
|
static double calcNormalizedScore(const unsigned short* before, const int beforeLength,
|
||||||
const unsigned short* after, const int afterLength, const int score);
|
const unsigned short* after, const int afterLength, const int score);
|
||||||
static int editDistance(const unsigned short* before,
|
static int editDistance(const unsigned short* before,
|
||||||
|
|
|
@ -180,10 +180,9 @@ static void prof_out(void) {
|
||||||
#define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true
|
#define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true
|
||||||
|
|
||||||
#define SUGGEST_WORDS_WITH_MISSING_CHARACTER true
|
#define SUGGEST_WORDS_WITH_MISSING_CHARACTER true
|
||||||
#define SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER true
|
|
||||||
#define SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER true
|
#define SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER true
|
||||||
#define SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS true
|
#define SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS true
|
||||||
#define SUGGEST_WORDS_WITH_SPACE_PROXIMITY true
|
#define SUGGEST_MULTIPLE_WORDS true
|
||||||
|
|
||||||
// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
|
// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
|
||||||
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80
|
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80
|
||||||
|
@ -233,7 +232,7 @@ static void prof_out(void) {
|
||||||
|
|
||||||
// Minimum suggest depth for one word for all cases except for missing space suggestions.
|
// Minimum suggest depth for one word for all cases except for missing space suggestions.
|
||||||
#define MIN_SUGGEST_DEPTH 1
|
#define MIN_SUGGEST_DEPTH 1
|
||||||
#define MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION 3
|
#define MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION 3
|
||||||
#define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3
|
#define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3
|
||||||
|
|
||||||
#define min(a,b) ((a)<(b)?(a):(b))
|
#define min(a,b) ((a)<(b)?(a):(b))
|
||||||
|
|
|
@ -211,7 +211,6 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
||||||
PROF_END(3);
|
PROF_END(3);
|
||||||
|
|
||||||
PROF_START(4);
|
PROF_START(4);
|
||||||
// Note: This line is intentionally left blank
|
|
||||||
bool hasAutoCorrectionCandidate = false;
|
bool hasAutoCorrectionCandidate = false;
|
||||||
WordsPriorityQueue* masterQueue = queuePool->getMasterQueue();
|
WordsPriorityQueue* masterQueue = queuePool->getMasterQueue();
|
||||||
if (masterQueue->size() > 0) {
|
if (masterQueue->size() > 0) {
|
||||||
|
@ -222,14 +221,14 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
||||||
PROF_END(4);
|
PROF_END(4);
|
||||||
|
|
||||||
PROF_START(5);
|
PROF_START(5);
|
||||||
// Suggestions with missing space
|
// Multiple word suggestions
|
||||||
if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER
|
if (SUGGEST_MULTIPLE_WORDS
|
||||||
&& inputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {
|
&& inputLength >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) {
|
||||||
for (int i = 1; i < inputLength; ++i) {
|
for (int i = 1; i < inputLength; ++i) {
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
AKLOGI("--- Suggest missing space characters %d", i);
|
AKLOGI("--- Suggest multiple words %d", i);
|
||||||
}
|
}
|
||||||
getMissingSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
|
getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
|
||||||
useFullEditDistance, inputLength, i, correction, queuePool,
|
useFullEditDistance, inputLength, i, correction, queuePool,
|
||||||
hasAutoCorrectionCandidate);
|
hasAutoCorrectionCandidate);
|
||||||
}
|
}
|
||||||
|
@ -237,26 +236,9 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
||||||
PROF_END(5);
|
PROF_END(5);
|
||||||
|
|
||||||
PROF_START(6);
|
PROF_START(6);
|
||||||
if (SUGGEST_WORDS_WITH_SPACE_PROXIMITY && proximityInfo) {
|
// Note: This line is intentionally left blank
|
||||||
// The first and last "mistyped spaces" are taken care of by excessive character handling
|
|
||||||
for (int i = 1; i < inputLength - 1; ++i) {
|
|
||||||
if (DEBUG_DICT) {
|
|
||||||
AKLOGI("--- Suggest words with proximity space %d", i);
|
|
||||||
}
|
|
||||||
const int x = xcoordinates[i];
|
|
||||||
const int y = ycoordinates[i];
|
|
||||||
if (DEBUG_PROXIMITY_INFO) {
|
|
||||||
AKLOGI("Input[%d] x = %d, y = %d, has space proximity = %d",
|
|
||||||
i, x, y, proximityInfo->hasSpaceProximity(x, y));
|
|
||||||
}
|
|
||||||
if (proximityInfo->hasSpaceProximity(x, y)) {
|
|
||||||
getMistypedSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
|
|
||||||
useFullEditDistance, inputLength, i, correction, queuePool,
|
|
||||||
hasAutoCorrectionCandidate);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PROF_END(6);
|
PROF_END(6);
|
||||||
|
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
queuePool->dumpSubQueue1TopSuggestions();
|
queuePool->dumpSubQueue1TopSuggestions();
|
||||||
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
|
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
|
||||||
|
@ -337,24 +319,6 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnigramDictionary::getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
|
|
||||||
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
|
|
||||||
const int inputLength, const int missingSpacePos, Correction *correction,
|
|
||||||
WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate) {
|
|
||||||
getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
|
|
||||||
useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */,
|
|
||||||
correction, queuePool, hasAutoCorrectionCandidate);
|
|
||||||
}
|
|
||||||
|
|
||||||
void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
|
|
||||||
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
|
|
||||||
const int inputLength, const int spaceProximityPos, Correction *correction,
|
|
||||||
WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate) {
|
|
||||||
getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
|
|
||||||
useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos,
|
|
||||||
correction, queuePool, hasAutoCorrectionCandidate);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void UnigramDictionary::onTerminal(const int freq,
|
inline void UnigramDictionary::onTerminal(const int freq,
|
||||||
const TerminalAttributes& terminalAttributes, Correction *correction,
|
const TerminalAttributes& terminalAttributes, Correction *correction,
|
||||||
WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
|
WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
|
||||||
|
@ -405,15 +369,23 @@ inline void UnigramDictionary::onTerminal(const int freq,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int UnigramDictionary::getSubStringSuggestion(
|
bool UnigramDictionary::getSubStringSuggestion(
|
||||||
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
|
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
|
||||||
const int *codes, const bool useFullEditDistance, Correction *correction,
|
const int *codes, const bool useFullEditDistance, Correction *correction,
|
||||||
WordsPriorityQueuePool* queuePool, const int inputLength,
|
WordsPriorityQueuePool* queuePool, const int inputLength,
|
||||||
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
|
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
|
||||||
const int inputWordStartPos, const int inputWordLength,
|
const int inputWordStartPos, const int inputWordLength,
|
||||||
const int outputWordStartPos, unsigned short* outputWord, int *outputWordLength) {
|
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
|
||||||
|
int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) {
|
||||||
|
if (DEBUG_DICT) {
|
||||||
|
assert(currentWordIndex >= 1);
|
||||||
|
}
|
||||||
unsigned short* tempOutputWord = 0;
|
unsigned short* tempOutputWord = 0;
|
||||||
int tempOutputWordLength = 0;
|
int tempOutputWordLength = 0;
|
||||||
|
// TODO: Optimize init suggestion
|
||||||
|
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
|
||||||
|
inputLength, correction);
|
||||||
|
|
||||||
int freq = getMostFrequentWordLike(
|
int freq = getMostFrequentWordLike(
|
||||||
inputWordStartPos, inputWordLength, proximityInfo, mWord);
|
inputWordStartPos, inputWordLength, proximityInfo, mWord);
|
||||||
if (freq > 0) {
|
if (freq > 0) {
|
||||||
|
@ -438,7 +410,7 @@ int UnigramDictionary::getSubStringSuggestion(
|
||||||
}
|
}
|
||||||
WordsPriorityQueue* queue = queuePool->getSubQueue(currentWordIndex, inputWordLength);
|
WordsPriorityQueue* queue = queuePool->getSubQueue(currentWordIndex, inputWordLength);
|
||||||
if (!queue || queue->size() < 1) {
|
if (!queue || queue->size() < 1) {
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
int score = 0;
|
int score = 0;
|
||||||
const double ns = queue->getHighestNormalizedScore(
|
const double ns = queue->getHighestNormalizedScore(
|
||||||
|
@ -451,93 +423,105 @@ int UnigramDictionary::getSubStringSuggestion(
|
||||||
// threshold.
|
// threshold.
|
||||||
if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
|
if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
|
||||||
|| tempOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
|
|| tempOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
freq = score >> (tempOutputWordLength
|
freq = score >> (tempOutputWordLength
|
||||||
+ TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
|
+ TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
|
||||||
}
|
}
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
AKLOGI("Freq(%d): %d", currentWordIndex, freq);
|
AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d"
|
||||||
|
, currentWordIndex, freq, tempOutputWordLength, inputWordLength, inputWordStartPos);
|
||||||
}
|
}
|
||||||
if (freq <= 0 || tempOutputWordLength <= 0
|
if (freq <= 0 || tempOutputWordLength <= 0
|
||||||
|| MAX_WORD_LENGTH <= (outputWordStartPos + tempOutputWordLength)) {
|
|| MAX_WORD_LENGTH <= (outputWordStartPos + tempOutputWordLength)) {
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < tempOutputWordLength; ++i) {
|
for (int i = 0; i < tempOutputWordLength; ++i) {
|
||||||
outputWord[outputWordStartPos + i] = tempOutputWord[i];
|
outputWord[outputWordStartPos + i] = tempOutputWord[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Put output values
|
||||||
|
freqArray[currentWordIndex - 1] = freq;
|
||||||
|
// TODO: put output length instead of input length
|
||||||
|
wordLengthArray[currentWordIndex - 1] = inputWordLength;
|
||||||
|
*outputWordLength = outputWordStartPos + tempOutputWordLength;
|
||||||
|
|
||||||
if ((inputWordStartPos + inputWordLength) < inputLength) {
|
if ((inputWordStartPos + inputWordLength) < inputLength) {
|
||||||
if (outputWordStartPos + tempOutputWordLength >= MAX_WORD_LENGTH) {
|
if (outputWordStartPos + tempOutputWordLength >= MAX_WORD_LENGTH) {
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
outputWord[outputWordStartPos + tempOutputWordLength] = SPACE;
|
outputWord[outputWordStartPos + tempOutputWordLength] = SPACE;
|
||||||
++tempOutputWordLength;
|
++*outputWordLength;
|
||||||
|
} else if (currentWordIndex >= 2) {
|
||||||
|
// TODO: Handle 3 or more words
|
||||||
|
const int pairFreq = correction->getFreqForSplitTwoWords(
|
||||||
|
freqArray, wordLengthArray, isSpaceProximity, outputWord);
|
||||||
|
if (DEBUG_DICT) {
|
||||||
|
AKLOGI("Split two words: %d, %d, %d, %d", freqArray[0], freqArray[1], pairFreq,
|
||||||
|
inputLength);
|
||||||
}
|
}
|
||||||
*outputWordLength = outputWordStartPos + tempOutputWordLength;
|
addWord(outputWord, *outputWordLength, pairFreq, queuePool->getMasterQueue());
|
||||||
return freq;
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
|
void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
|
||||||
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
||||||
const bool useFullEditDistance, const int inputLength, const int missingSpacePos,
|
const bool useFullEditDistance, const int inputLength, const int wordDivideIndex,
|
||||||
const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool,
|
Correction *correction, WordsPriorityQueuePool* queuePool,
|
||||||
const bool hasAutoCorrectionCandidate) {
|
const bool hasAutoCorrectionCandidate) {
|
||||||
if (inputLength >= MAX_WORD_LENGTH) return;
|
if (inputLength >= MAX_WORD_LENGTH) return;
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
int inputCount = 0;
|
|
||||||
if (spaceProximityPos >= 0) ++inputCount;
|
|
||||||
if (missingSpacePos >= 0) ++inputCount;
|
|
||||||
assert(inputCount <= 1);
|
|
||||||
// MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16
|
// MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16
|
||||||
assert(MAX_PROXIMITY_CHARS == 16);
|
assert(MAX_PROXIMITY_CHARS == 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
|
|
||||||
inputLength, correction);
|
|
||||||
|
|
||||||
// Allocating fixed length array on stack
|
// Allocating fixed length array on stack
|
||||||
unsigned short outputWord[MAX_WORD_LENGTH];
|
unsigned short outputWord[MAX_WORD_LENGTH];
|
||||||
|
int freqArray[SUB_QUEUE_MAX_WORD_INDEX];
|
||||||
|
int wordLengthArray[SUB_QUEUE_MAX_WORD_INDEX];
|
||||||
int outputWordLength = 0;
|
int outputWordLength = 0;
|
||||||
|
|
||||||
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
|
|
||||||
const bool isSpaceProximity = spaceProximityPos >= 0;
|
|
||||||
|
|
||||||
// First word
|
// First word
|
||||||
int inputWordStartPos = 0;
|
int inputWordStartPos = 0;
|
||||||
int inputWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
|
int inputWordLength = wordDivideIndex;
|
||||||
const int firstFreq = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
|
if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
|
||||||
useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
|
useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
|
||||||
FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, outputWord, &outputWordLength);
|
FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, true /* not used */,
|
||||||
if (firstFreq <= 0) {
|
freqArray, wordLengthArray, outputWord, &outputWordLength)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const int tempOutputWordLength = outputWordLength;
|
||||||
// Second word
|
// Second word
|
||||||
inputWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
|
// Missing space
|
||||||
inputWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
|
inputWordStartPos = wordDivideIndex;
|
||||||
: (inputLength - missingSpacePos);
|
inputWordLength = inputLength - wordDivideIndex;
|
||||||
const int secondFreq = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
|
getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
|
||||||
useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
|
useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
|
||||||
SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, outputWordLength, outputWord,
|
SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
|
||||||
&outputWordLength);
|
false /* missing space */, freqArray, wordLengthArray, outputWord, &outputWordLength);
|
||||||
if (secondFreq <= 0) {
|
|
||||||
|
// Mistyped space
|
||||||
|
++inputWordStartPos;
|
||||||
|
--inputWordLength;
|
||||||
|
|
||||||
|
if (inputWordLength <= 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Remove initSuggestions and correction->setCorrectionParams
|
const int x = xcoordinates[inputWordStartPos - 1];
|
||||||
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
|
const int y = ycoordinates[inputWordStartPos - 1];
|
||||||
|
if (!proximityInfo->hasSpaceProximity(x, y)) {
|
||||||
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
|
|
||||||
-1 /* transposedPos */, spaceProximityPos, missingSpacePos,
|
|
||||||
useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
|
|
||||||
const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, outputWord);
|
|
||||||
if (DEBUG_DICT) {
|
|
||||||
AKLOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
|
|
||||||
}
|
|
||||||
addWord(outputWord, outputWordLength, pairFreq, masterQueue);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
|
||||||
|
useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
|
||||||
|
SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
|
||||||
|
true /* mistyped space */, freqArray, wordLengthArray, outputWord, &outputWordLength);
|
||||||
|
}
|
||||||
|
|
||||||
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
|
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
|
||||||
// interface.
|
// interface.
|
||||||
inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
|
inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
|
||||||
|
|
|
@ -103,17 +103,9 @@ class UnigramDictionary {
|
||||||
const int currentWordIndex);
|
const int currentWordIndex);
|
||||||
void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
|
void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
|
||||||
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
||||||
const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
|
const bool useFullEditDistance, const int inputLength, const int wordDivideIndex,
|
||||||
const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool,
|
Correction *correction, WordsPriorityQueuePool* queuePool,
|
||||||
const bool hasAutoCorrectionCandidate);
|
const bool hasAutoCorrectionCandidate);
|
||||||
void getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
|
|
||||||
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
|
|
||||||
const int inputLength, const int missingSpacePos, Correction *correction,
|
|
||||||
WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate);
|
|
||||||
void getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
|
|
||||||
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
|
|
||||||
const int inputLength, const int spaceProximityPos, Correction *correction,
|
|
||||||
WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate);
|
|
||||||
void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
|
void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
|
||||||
Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
|
Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
|
||||||
const int currentWordIndex);
|
const int currentWordIndex);
|
||||||
|
@ -127,13 +119,14 @@ class UnigramDictionary {
|
||||||
ProximityInfo *proximityInfo, unsigned short *word);
|
ProximityInfo *proximityInfo, unsigned short *word);
|
||||||
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
|
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
|
||||||
short unsigned int *outWord);
|
short unsigned int *outWord);
|
||||||
int getSubStringSuggestion(
|
bool getSubStringSuggestion(
|
||||||
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
|
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
|
||||||
const int *codes, const bool useFullEditDistance, Correction *correction,
|
const int *codes, const bool useFullEditDistance, Correction *correction,
|
||||||
WordsPriorityQueuePool* queuePool, const int inputLength,
|
WordsPriorityQueuePool* queuePool, const int inputLength,
|
||||||
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
|
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
|
||||||
const int inputWordStartPos, const int inputWordLength,
|
const int inputWordStartPos, const int inputWordLength,
|
||||||
const int outputWordStartPos, unsigned short* outputWord, int *outputWordLength);
|
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
|
||||||
|
int *wordLengthArray, unsigned short* outputWord, int *outputWordLength);
|
||||||
|
|
||||||
const uint8_t* const DICT_ROOT;
|
const uint8_t* const DICT_ROOT;
|
||||||
const int MAX_WORD_LENGTH;
|
const int MAX_WORD_LENGTH;
|
||||||
|
|
Loading…
Reference in New Issue