Merge "Two words error correction with other error correction for the first word"
commit
2010130e44
|
@ -39,15 +39,15 @@ inline static void initEditDistance(int *editDistanceTable) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int inputLength,
|
inline static void dumpEditDistance10ForDebug(int *editDistanceTable,
|
||||||
const int outputLength) {
|
const int editDistanceTableWidth, const int outputLength) {
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
AKLOGI("EditDistanceTable");
|
AKLOGI("EditDistanceTable");
|
||||||
for (int i = 0; i <= 10; ++i) {
|
for (int i = 0; i <= 10; ++i) {
|
||||||
int c[11];
|
int c[11];
|
||||||
for (int j = 0; j <= 10; ++j) {
|
for (int j = 0; j <= 10; ++j) {
|
||||||
if (j < inputLength + 1 && i < outputLength + 1) {
|
if (j < editDistanceTableWidth + 1 && i < outputLength + 1) {
|
||||||
c[j] = (editDistanceTable + i * (inputLength + 1))[j];
|
c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j];
|
||||||
} else {
|
} else {
|
||||||
c[j] = -1;
|
c[j] = -1;
|
||||||
}
|
}
|
||||||
|
@ -81,12 +81,12 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static int getCurrentEditDistance(
|
inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth,
|
||||||
int *editDistanceTable, const int inputLength, const int outputLength) {
|
const int outputLength, const int inputLength) {
|
||||||
if (DEBUG_EDIT_DISTANCE) {
|
if (DEBUG_EDIT_DISTANCE) {
|
||||||
AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
|
AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
|
||||||
}
|
}
|
||||||
return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1];
|
return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputLength];
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////
|
//////////////////////
|
||||||
|
@ -165,6 +165,16 @@ int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFre
|
||||||
}
|
}
|
||||||
|
|
||||||
int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
|
int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
|
||||||
|
return getFinalFreqInternal(freq, word, wordLength, mInputLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
int Correction::getFinalFreqForSubQueue(const int freq, unsigned short **word, int *wordLength,
|
||||||
|
const int inputLength) {
|
||||||
|
return getFinalFreqInternal(freq, word, wordLength, inputLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
int Correction::getFinalFreqInternal(const int freq, unsigned short **word, int *wordLength,
|
||||||
|
const int inputLength) {
|
||||||
const int outputIndex = mTerminalOutputIndex;
|
const int outputIndex = mTerminalOutputIndex;
|
||||||
const int inputIndex = mTerminalInputIndex;
|
const int inputIndex = mTerminalInputIndex;
|
||||||
*wordLength = outputIndex + 1;
|
*wordLength = outputIndex + 1;
|
||||||
|
@ -173,8 +183,9 @@ int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLen
|
||||||
}
|
}
|
||||||
|
|
||||||
*word = mWord;
|
*word = mWord;
|
||||||
return Correction::RankingAlgorithm::calculateFinalFreq(
|
int finalFreq = Correction::RankingAlgorithm::calculateFinalFreq(
|
||||||
inputIndex, outputIndex, freq, mEditDistanceTable, this);
|
inputIndex, outputIndex, freq, mEditDistanceTable, this, inputLength);
|
||||||
|
return finalFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Correction::initProcessState(const int outputIndex) {
|
bool Correction::initProcessState(const int outputIndex) {
|
||||||
|
@ -613,9 +624,9 @@ inline static bool isUpperCase(unsigned short c) {
|
||||||
|
|
||||||
/* static */
|
/* static */
|
||||||
int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
|
int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
|
||||||
const int freq, int* editDistanceTable, const Correction* correction) {
|
const int freq, int* editDistanceTable, const Correction* correction,
|
||||||
|
const int inputLength) {
|
||||||
const int excessivePos = correction->getExcessivePos();
|
const int excessivePos = correction->getExcessivePos();
|
||||||
const int inputLength = correction->mInputLength;
|
|
||||||
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
|
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
|
||||||
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
|
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
|
||||||
const ProximityInfo *proximityInfo = correction->mProximityInfo;
|
const ProximityInfo *proximityInfo = correction->mProximityInfo;
|
||||||
|
@ -640,13 +651,13 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
|
||||||
const unsigned short* word = correction->mWord;
|
const unsigned short* word = correction->mWord;
|
||||||
const bool skipped = skippedCount > 0;
|
const bool skipped = skippedCount > 0;
|
||||||
|
|
||||||
const int quoteDiffCount = max(0, getQuoteCount(word, outputIndex + 1)
|
const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
|
||||||
- getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength));
|
- getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength));
|
||||||
|
|
||||||
// TODO: Calculate edit distance for transposed and excessive
|
// TODO: Calculate edit distance for transposed and excessive
|
||||||
int ed = 0;
|
int ed = 0;
|
||||||
if (DEBUG_DICT_FULL) {
|
if (DEBUG_DICT_FULL) {
|
||||||
dumpEditDistance10ForDebug(editDistanceTable, inputLength, outputIndex + 1);
|
dumpEditDistance10ForDebug(editDistanceTable, correction->mInputLength, outputLength);
|
||||||
}
|
}
|
||||||
int adjustedProximityMatchedCount = proximityMatchedCount;
|
int adjustedProximityMatchedCount = proximityMatchedCount;
|
||||||
|
|
||||||
|
@ -654,22 +665,22 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
|
||||||
|
|
||||||
// TODO: Optimize this.
|
// TODO: Optimize this.
|
||||||
if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) {
|
if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) {
|
||||||
ed = getCurrentEditDistance(editDistanceTable, inputLength, outputIndex + 1)
|
ed = getCurrentEditDistance(editDistanceTable, correction->mInputLength, outputLength,
|
||||||
- transposedCount;
|
inputLength) - transposedCount;
|
||||||
|
|
||||||
const int matchWeight = powerIntCapped(typedLetterMultiplier,
|
const int matchWeight = powerIntCapped(typedLetterMultiplier,
|
||||||
max(inputLength, outputIndex + 1) - ed);
|
max(inputLength, outputLength) - ed);
|
||||||
multiplyIntCapped(matchWeight, &finalFreq);
|
multiplyIntCapped(matchWeight, &finalFreq);
|
||||||
|
|
||||||
// TODO: Demote further if there are two or more excessive chars with longer user input?
|
// TODO: Demote further if there are two or more excessive chars with longer user input?
|
||||||
if (inputLength > outputIndex + 1) {
|
if (inputLength > outputLength) {
|
||||||
multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq);
|
multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq);
|
||||||
}
|
}
|
||||||
|
|
||||||
ed = max(0, ed - quoteDiffCount);
|
ed = max(0, ed - quoteDiffCount);
|
||||||
|
|
||||||
if (transposedCount < 1) {
|
if (transposedCount < 1) {
|
||||||
if (ed == 1 && (inputLength == outputIndex || inputLength == outputIndex + 2)) {
|
if (ed == 1 && (inputLength == outputLength - 1 || inputLength == outputLength + 1)) {
|
||||||
// Promote a word with just one skipped or excessive char
|
// Promote a word with just one skipped or excessive char
|
||||||
if (sameLength) {
|
if (sameLength) {
|
||||||
multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
|
multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
|
||||||
|
@ -681,7 +692,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
|
||||||
sameLength = true;
|
sameLength = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
adjustedProximityMatchedCount = min(max(0, ed - (outputIndex + 1 - inputLength)),
|
adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)),
|
||||||
proximityMatchedCount);
|
proximityMatchedCount);
|
||||||
} else {
|
} else {
|
||||||
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
|
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
|
||||||
|
@ -783,7 +794,8 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
|
||||||
// Promotion for an exactly matched word
|
// Promotion for an exactly matched word
|
||||||
if (ed == 0) {
|
if (ed == 0) {
|
||||||
// Full exact match
|
// Full exact match
|
||||||
if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0) {
|
if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0
|
||||||
|
&& quoteDiffCount == 0) {
|
||||||
finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
|
finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -828,14 +840,14 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DEBUG_DICT_FULL) {
|
if (DEBUG_DICT_FULL) {
|
||||||
AKLOGI("calc: %d, %d", outputIndex, sameLength);
|
AKLOGI("calc: %d, %d", outputLength, sameLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DEBUG_CORRECTION_FREQ) {
|
if (DEBUG_CORRECTION_FREQ) {
|
||||||
DUMP_WORD(correction->mWord, outputIndex + 1);
|
DUMP_WORD(correction->mWord, outputLength);
|
||||||
AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d", proximityMatchedCount,
|
AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount,
|
||||||
skippedCount, transposedCount, excessiveCount, lastCharExceeded, sameLength,
|
skippedCount, transposedCount, excessiveCount, outputLength, lastCharExceeded,
|
||||||
quoteDiffCount, ed, finalFreq);
|
sameLength, quoteDiffCount, ed, finalFreq);
|
||||||
}
|
}
|
||||||
|
|
||||||
return finalFreq;
|
return finalFreq;
|
||||||
|
@ -881,11 +893,12 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
|
||||||
if (firstWordLength == 0 || secondWordLength == 0) {
|
if (firstWordLength == 0 || secondWordLength == 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
const int firstDemotionRate = 100 - 100 / (firstWordLength + 1);
|
const int firstDemotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (firstWordLength + 1);
|
||||||
int tempFirstFreq = firstFreq;
|
int tempFirstFreq = firstFreq;
|
||||||
multiplyRate(firstDemotionRate, &tempFirstFreq);
|
multiplyRate(firstDemotionRate, &tempFirstFreq);
|
||||||
|
|
||||||
const int secondDemotionRate = 100 - 100 / (secondWordLength + 1);
|
const int secondDemotionRate = 100
|
||||||
|
- TWO_WORDS_CORRECTION_DEMOTION_BASE / (secondWordLength + 1);
|
||||||
int tempSecondFreq = secondFreq;
|
int tempSecondFreq = secondFreq;
|
||||||
multiplyRate(secondDemotionRate, &tempSecondFreq);
|
multiplyRate(secondDemotionRate, &tempSecondFreq);
|
||||||
|
|
||||||
|
|
|
@ -75,6 +75,8 @@ class Correction {
|
||||||
int getFreqForSplitTwoWords(
|
int getFreqForSplitTwoWords(
|
||||||
const int firstFreq, const int secondFreq, const unsigned short *word);
|
const int firstFreq, const int secondFreq, const unsigned short *word);
|
||||||
int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
|
int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
|
||||||
|
int getFinalFreqForSubQueue(const int freq, unsigned short **word, int* wordLength,
|
||||||
|
const int inputLength);
|
||||||
|
|
||||||
CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
|
CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
|
||||||
|
|
||||||
|
@ -97,7 +99,8 @@ class Correction {
|
||||||
class RankingAlgorithm {
|
class RankingAlgorithm {
|
||||||
public:
|
public:
|
||||||
static int calculateFinalFreq(const int inputIndex, const int depth,
|
static int calculateFinalFreq(const int inputIndex, const int depth,
|
||||||
const int freq, int *editDistanceTable, const Correction* correction);
|
const int freq, int *editDistanceTable, const Correction* correction,
|
||||||
|
const int inputLength);
|
||||||
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
|
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
|
||||||
const Correction* correction, const unsigned short *word);
|
const Correction* correction, const unsigned short *word);
|
||||||
static int calcFreqForSplitTwoWordsOld(const int firstFreq, const int secondFreq,
|
static int calcFreqForSplitTwoWordsOld(const int firstFreq, const int secondFreq,
|
||||||
|
@ -122,6 +125,8 @@ class Correction {
|
||||||
const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
|
const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
|
||||||
inline CorrectionType processUnrelatedCorrectionType();
|
inline CorrectionType processUnrelatedCorrectionType();
|
||||||
inline void addCharToCurrentWord(const int32_t c);
|
inline void addCharToCurrentWord(const int32_t c);
|
||||||
|
inline int getFinalFreqInternal(const int freq, unsigned short **word, int* wordLength,
|
||||||
|
const int inputLength);
|
||||||
|
|
||||||
const int TYPED_LETTER_MULTIPLIER;
|
const int TYPED_LETTER_MULTIPLIER;
|
||||||
const int FULL_WORD_MULTIPLIER;
|
const int FULL_WORD_MULTIPLIER;
|
||||||
|
|
|
@ -187,7 +187,7 @@ static void prof_out(void) {
|
||||||
// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
|
// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
|
||||||
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80
|
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80
|
||||||
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12
|
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12
|
||||||
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 67
|
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 58
|
||||||
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
|
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
|
||||||
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
|
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
|
||||||
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70
|
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70
|
||||||
|
@ -199,6 +199,8 @@ static void prof_out(void) {
|
||||||
#define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70
|
#define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70
|
||||||
#define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96
|
#define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96
|
||||||
#define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50
|
#define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50
|
||||||
|
#define TWO_WORDS_CORRECTION_DEMOTION_BASE 80
|
||||||
|
#define TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER 1
|
||||||
#define ZERO_DISTANCE_PROMOTION_RATE 110
|
#define ZERO_DISTANCE_PROMOTION_RATE 110
|
||||||
#define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f
|
#define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f
|
||||||
#define HALF_SCORE_SQUARED_RADIUS 32.0f
|
#define HALF_SCORE_SQUARED_RADIUS 32.0f
|
||||||
|
@ -212,8 +214,10 @@ static void prof_out(void) {
|
||||||
// Holds up to 1 candidate for each word
|
// Holds up to 1 candidate for each word
|
||||||
#define SUB_QUEUE_MAX_WORDS 1
|
#define SUB_QUEUE_MAX_WORDS 1
|
||||||
#define SUB_QUEUE_MAX_COUNT 10
|
#define SUB_QUEUE_MAX_COUNT 10
|
||||||
|
#define SUB_QUEUE_MIN_WORD_LENGTH 4
|
||||||
|
|
||||||
#define TWO_WORDS_CORRECTION_THRESHOLD 0.22f
|
#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39
|
||||||
|
#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22
|
||||||
|
|
||||||
#define MAX_DEPTH_MULTIPLIER 3
|
#define MAX_DEPTH_MULTIPLIER 3
|
||||||
|
|
||||||
|
|
|
@ -254,7 +254,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
||||||
proximityInfo->getPrimaryInputWord(), i, word, wordLength, score);
|
proximityInfo->getPrimaryInputWord(), i, word, wordLength, score);
|
||||||
ns += 0;
|
ns += 0;
|
||||||
AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns,
|
AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns,
|
||||||
(ns > TWO_WORDS_CORRECTION_THRESHOLD));
|
(ns > TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD));
|
||||||
DUMP_WORD(proximityInfo->getPrimaryInputWord(), i);
|
DUMP_WORD(proximityInfo->getPrimaryInputWord(), i);
|
||||||
DUMP_WORD(word, wordLength);
|
DUMP_WORD(word, wordLength);
|
||||||
}
|
}
|
||||||
|
@ -343,43 +343,45 @@ inline void UnigramDictionary::onTerminal(const int freq,
|
||||||
WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) {
|
WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) {
|
||||||
const int inputIndex = correction->getInputIndex();
|
const int inputIndex = correction->getInputIndex();
|
||||||
const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
|
const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
|
||||||
if (!addToMasterQueue && !addToSubQueue) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
|
|
||||||
WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex);
|
|
||||||
int wordLength;
|
int wordLength;
|
||||||
unsigned short* wordPointer;
|
unsigned short* wordPointer;
|
||||||
const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
|
|
||||||
if (finalFreq != NOT_A_FREQUENCY) {
|
if (addToMasterQueue) {
|
||||||
if (!terminalAttributes.isShortcutOnly()) {
|
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
|
||||||
if (addToMasterQueue) {
|
const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
|
||||||
|
if (finalFreq != NOT_A_FREQUENCY) {
|
||||||
|
if (!terminalAttributes.isShortcutOnly()) {
|
||||||
addWord(wordPointer, wordLength, finalFreq, masterQueue);
|
addWord(wordPointer, wordLength, finalFreq, masterQueue);
|
||||||
}
|
}
|
||||||
// TODO: Check the validity of "inputIndex == wordLength"
|
|
||||||
//if (addToSubQueue && inputIndex == wordLength) {
|
// Please note that the shortcut candidates will be added to the master queue only.
|
||||||
if (addToSubQueue) {
|
TerminalAttributes::ShortcutIterator iterator =
|
||||||
addWord(wordPointer, wordLength, finalFreq, subQueue);
|
terminalAttributes.getShortcutIterator();
|
||||||
|
while (iterator.hasNextShortcutTarget()) {
|
||||||
|
// TODO: addWord only supports weak ordering, meaning we have no means
|
||||||
|
// to control the order of the shortcuts relative to one another or to the word.
|
||||||
|
// We need to either modulate the frequency of each shortcut according
|
||||||
|
// to its own shortcut frequency or to make the queue
|
||||||
|
// so that the insert order is protected inside the queue for words
|
||||||
|
// with the same score.
|
||||||
|
uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
|
||||||
|
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
|
||||||
|
MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
|
||||||
|
addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Please note that the shortcut candidates will be added to the master queue only.
|
}
|
||||||
if (!addToMasterQueue) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// From here, below is the code to add shortcut candidates.
|
// We only allow two words + other error correction for words with SUB_QUEUE_MIN_WORD_LENGTH
|
||||||
TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
|
// or more length.
|
||||||
while (iterator.hasNextShortcutTarget()) {
|
if (inputIndex >= SUB_QUEUE_MIN_WORD_LENGTH && addToSubQueue) {
|
||||||
// TODO: addWord only supports weak ordering, meaning we have no means to control the
|
// TODO: Check the validity of "inputIndex == wordLength"
|
||||||
// order of the shortcuts relative to one another or to the word. We need to either
|
//if (addToSubQueue && inputIndex == wordLength) {
|
||||||
// modulate the frequency of each shortcut according to its own shortcut frequency or
|
WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex);
|
||||||
// to make the queue so that the insert order is protected inside the queue for words
|
const int finalFreq = correction->getFinalFreqForSubQueue(freq, &wordPointer, &wordLength,
|
||||||
// with the same score.
|
inputIndex);
|
||||||
uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
|
addWord(wordPointer, wordLength, finalFreq, subQueue);
|
||||||
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
|
|
||||||
MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
|
|
||||||
addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -397,20 +399,57 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
|
||||||
}
|
}
|
||||||
const bool isSpaceProximity = spaceProximityPos >= 0;
|
const bool isSpaceProximity = spaceProximityPos >= 0;
|
||||||
const int firstWordStartPos = 0;
|
const int firstWordStartPos = 0;
|
||||||
|
|
||||||
|
const int firstTypedWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
|
||||||
|
int firstFreq = getMostFrequentWordLike(0, firstTypedWordLength, proximityInfo, mWord);
|
||||||
|
unsigned short* firstWord = 0;
|
||||||
|
int firstWordLength = 0;
|
||||||
|
if (firstFreq > 0) {
|
||||||
|
firstWordLength = firstTypedWordLength;
|
||||||
|
firstWord = mWord;
|
||||||
|
} else {
|
||||||
|
if (masterQueue->size() > 0) {
|
||||||
|
double nsForMaster = masterQueue->getHighestNormalizedScore(
|
||||||
|
proximityInfo->getPrimaryInputWord(), inputLength, 0, 0, 0);
|
||||||
|
if (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD) {
|
||||||
|
// Do nothing if the highest suggestion exceeds the threshold.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstTypedWordLength);
|
||||||
|
if (firstWordQueue->size() < 1) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int score = 0;
|
||||||
|
const double ns = firstWordQueue->getHighestNormalizedScore(
|
||||||
|
proximityInfo->getPrimaryInputWord(), firstTypedWordLength, &firstWord, &score,
|
||||||
|
&firstWordLength);
|
||||||
|
// Two words correction won't be done if the score of the first word doesn't exceed the
|
||||||
|
// threshold.
|
||||||
|
if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
firstFreq = score >> (firstWordLength
|
||||||
|
+ TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (firstFreq <= 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
|
const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
|
||||||
const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
|
|
||||||
const int secondWordLength = isSpaceProximity
|
const int secondWordLength = isSpaceProximity
|
||||||
? (inputLength - spaceProximityPos - 1)
|
? (inputLength - spaceProximityPos - 1)
|
||||||
: (inputLength - missingSpacePos);
|
: (inputLength - missingSpacePos);
|
||||||
|
|
||||||
if (inputLength >= MAX_WORD_LENGTH) return;
|
if (inputLength >= MAX_WORD_LENGTH) return;
|
||||||
|
|
||||||
if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
|
if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
|
||||||
|| firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
|
|| firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const int newWordLength = firstWordLength + secondWordLength + 1;
|
const int newWordLength = firstWordLength + secondWordLength + 1;
|
||||||
|
|
||||||
|
|
||||||
// Space proximity preparation
|
// Space proximity preparation
|
||||||
//WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
|
//WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
|
||||||
//initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue,
|
//initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue,
|
||||||
|
@ -420,15 +459,12 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
|
||||||
|
|
||||||
// Allocating variable length array on stack
|
// Allocating variable length array on stack
|
||||||
unsigned short word[newWordLength];
|
unsigned short word[newWordLength];
|
||||||
const int firstFreq = getMostFrequentWordLike(
|
|
||||||
firstWordStartPos, firstWordLength, proximityInfo, mWord);
|
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
AKLOGI("First freq: %d", firstFreq);
|
AKLOGI("First freq: %d", firstFreq);
|
||||||
}
|
}
|
||||||
if (firstFreq <= 0) return;
|
|
||||||
|
|
||||||
for (int i = 0; i < firstWordLength; ++i) {
|
for (int i = 0; i < firstWordLength; ++i) {
|
||||||
word[i] = mWord[i];
|
word[i] = firstWord[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
const int secondFreq = getMostFrequentWordLike(
|
const int secondFreq = getMostFrequentWordLike(
|
||||||
|
|
|
@ -47,6 +47,7 @@ class WordsPriorityQueue {
|
||||||
for (int i = 0; i < maxWordLength; ++i) {
|
for (int i = 0; i < maxWordLength; ++i) {
|
||||||
mSuggestedWords[i].mUsed = false;
|
mSuggestedWords[i].mUsed = false;
|
||||||
}
|
}
|
||||||
|
mHighestSuggestedWord = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
~WordsPriorityQueue() {
|
~WordsPriorityQueue() {
|
||||||
|
@ -79,6 +80,9 @@ class WordsPriorityQueue {
|
||||||
DUMP_WORD(word, wordLength);
|
DUMP_WORD(word, wordLength);
|
||||||
}
|
}
|
||||||
mSuggestions.push(sw);
|
mSuggestions.push(sw);
|
||||||
|
if (!mHighestSuggestedWord || mHighestSuggestedWord->mScore < sw->mScore) {
|
||||||
|
mHighestSuggestedWord = sw;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SuggestedWord* top() {
|
SuggestedWord* top() {
|
||||||
|
@ -88,6 +92,7 @@ class WordsPriorityQueue {
|
||||||
}
|
}
|
||||||
|
|
||||||
int outputSuggestions(int *frequencies, unsigned short *outputChars) {
|
int outputSuggestions(int *frequencies, unsigned short *outputChars) {
|
||||||
|
mHighestSuggestedWord = 0;
|
||||||
const unsigned int size = min(MAX_WORDS, mSuggestions.size());
|
const unsigned int size = min(MAX_WORDS, mSuggestions.size());
|
||||||
int index = size - 1;
|
int index = size - 1;
|
||||||
while (!mSuggestions.empty() && index >= 0) {
|
while (!mSuggestions.empty() && index >= 0) {
|
||||||
|
@ -116,6 +121,7 @@ class WordsPriorityQueue {
|
||||||
}
|
}
|
||||||
|
|
||||||
void clear() {
|
void clear() {
|
||||||
|
mHighestSuggestedWord = 0;
|
||||||
while (!mSuggestions.empty()) {
|
while (!mSuggestions.empty()) {
|
||||||
SuggestedWord* sw = mSuggestions.top();
|
SuggestedWord* sw = mSuggestions.top();
|
||||||
if (DEBUG_WORDS_PRIORITY_QUEUE) {
|
if (DEBUG_WORDS_PRIORITY_QUEUE) {
|
||||||
|
@ -134,6 +140,28 @@ class WordsPriorityQueue {
|
||||||
DUMP_WORD(mSuggestions.top()->mWord, mSuggestions.top()->mWordLength);
|
DUMP_WORD(mSuggestions.top()->mWord, mSuggestions.top()->mWordLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double getHighestNormalizedScore(const unsigned short* before, const int beforeLength,
|
||||||
|
unsigned short** outWord, int *outScore, int *outLength) {
|
||||||
|
if (!mHighestSuggestedWord) {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
SuggestedWord* sw = mHighestSuggestedWord;
|
||||||
|
const int score = sw->mScore;
|
||||||
|
unsigned short* word = sw->mWord;
|
||||||
|
const int wordLength = sw->mWordLength;
|
||||||
|
if (outScore) {
|
||||||
|
*outScore = score;
|
||||||
|
}
|
||||||
|
if (outWord) {
|
||||||
|
*outWord = word;
|
||||||
|
}
|
||||||
|
if (outLength) {
|
||||||
|
*outLength = wordLength;
|
||||||
|
}
|
||||||
|
return Correction::RankingAlgorithm::calcNormalizedScore(
|
||||||
|
before, beforeLength, word, wordLength, score);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct wordComparator {
|
struct wordComparator {
|
||||||
bool operator ()(SuggestedWord * left, SuggestedWord * right) {
|
bool operator ()(SuggestedWord * left, SuggestedWord * right) {
|
||||||
|
@ -158,6 +186,7 @@ class WordsPriorityQueue {
|
||||||
const unsigned int MAX_WORDS;
|
const unsigned int MAX_WORDS;
|
||||||
const unsigned int MAX_WORD_LENGTH;
|
const unsigned int MAX_WORD_LENGTH;
|
||||||
SuggestedWord* mSuggestedWords;
|
SuggestedWord* mSuggestedWords;
|
||||||
|
SuggestedWord* mHighestSuggestedWord;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue