* commit 'd8096b1a12ce1f6f53a2b269956043d77276f007': Fix the performance issue on suggesting aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
This commit is contained in:
commit
023ca7609e
5 changed files with 66 additions and 15 deletions
|
@ -110,6 +110,10 @@ Correction::Correction(const int typedLetterMultiplier, const int fullWordMultip
|
||||||
initEditDistance(mEditDistanceTable);
|
initEditDistance(mEditDistanceTable);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Correction::resetCorrection() {
|
||||||
|
mTotalTraverseCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
|
void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
|
||||||
const int maxDepth) {
|
const int maxDepth) {
|
||||||
mProximityInfo = pi;
|
mProximityInfo = pi;
|
||||||
|
|
|
@ -94,6 +94,7 @@ class Correction {
|
||||||
}
|
}
|
||||||
|
|
||||||
Correction(const int typedLetterMultiplier, const int fullWordMultiplier);
|
Correction(const int typedLetterMultiplier, const int fullWordMultiplier);
|
||||||
|
void resetCorrection();
|
||||||
void initCorrection(
|
void initCorrection(
|
||||||
const ProximityInfo *pi, const int inputLength, const int maxWordLength);
|
const ProximityInfo *pi, const int inputLength, const int maxWordLength);
|
||||||
void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll);
|
void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll);
|
||||||
|
@ -129,6 +130,10 @@ class Correction {
|
||||||
|
|
||||||
bool needsToPrune() const;
|
bool needsToPrune() const;
|
||||||
|
|
||||||
|
int pushAndGetTotalTraverseCount() {
|
||||||
|
return ++mTotalTraverseCount;
|
||||||
|
}
|
||||||
|
|
||||||
int getFreqForSplitMultipleWords(
|
int getFreqForSplitMultipleWords(
|
||||||
const int *freqArray, const int *wordLengthArray, const int wordCount,
|
const int *freqArray, const int *wordLengthArray, const int wordCount,
|
||||||
const bool isSpaceProximity, const unsigned short *word);
|
const bool isSpaceProximity, const unsigned short *word);
|
||||||
|
@ -200,6 +205,8 @@ class Correction {
|
||||||
int mTerminalOutputIndex;
|
int mTerminalOutputIndex;
|
||||||
int mMaxErrors;
|
int mMaxErrors;
|
||||||
|
|
||||||
|
uint8_t mTotalTraverseCount;
|
||||||
|
|
||||||
// The following arrays are state buffer.
|
// The following arrays are state buffer.
|
||||||
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
||||||
int mDistances[MAX_WORD_LENGTH_INTERNAL];
|
int mDistances[MAX_WORD_LENGTH_INTERNAL];
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#define AKLOGI ALOGI
|
#define AKLOGI ALOGI
|
||||||
|
|
||||||
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
|
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
|
||||||
|
#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while(0)
|
||||||
|
|
||||||
static inline void dumpWord(const unsigned short* word, const int length) {
|
static inline void dumpWord(const unsigned short* word, const int length) {
|
||||||
static char charBuf[50];
|
static char charBuf[50];
|
||||||
|
@ -35,10 +36,21 @@ static inline void dumpWord(const unsigned short* word, const int length) {
|
||||||
AKLOGI("[ %s ]", charBuf);
|
AKLOGI("[ %s ]", charBuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void dumpWordInt(const int* word, const int length) {
|
||||||
|
static char charBuf[50];
|
||||||
|
|
||||||
|
for (int i = 0; i < length; ++i) {
|
||||||
|
charBuf[i] = word[i];
|
||||||
|
}
|
||||||
|
charBuf[length] = 0;
|
||||||
|
AKLOGI("i[ %s ]", charBuf);
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define AKLOGE(fmt, ...)
|
#define AKLOGE(fmt, ...)
|
||||||
#define AKLOGI(fmt, ...)
|
#define AKLOGI(fmt, ...)
|
||||||
#define DUMP_WORD(word, length)
|
#define DUMP_WORD(word, length)
|
||||||
|
#define DUMP_WORD_INT(word, length)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FLAG_DO_PROFILE
|
#ifdef FLAG_DO_PROFILE
|
||||||
|
@ -223,6 +235,10 @@ static inline void prof_out(void) {
|
||||||
#define SUB_QUEUE_MAX_COUNT 10
|
#define SUB_QUEUE_MAX_COUNT 10
|
||||||
#define SUB_QUEUE_MIN_WORD_LENGTH 4
|
#define SUB_QUEUE_MIN_WORD_LENGTH 4
|
||||||
#define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 10
|
#define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 10
|
||||||
|
// TODO: Remove this limitation
|
||||||
|
#define MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH 12
|
||||||
|
// TODO: Remove this limitation
|
||||||
|
#define MULTIPLE_WORDS_SUGGESTION_MAX_TOTAL_TRAVERSE_COUNT 110
|
||||||
#define MULTIPLE_WORDS_DEMOTION_RATE 80
|
#define MULTIPLE_WORDS_DEMOTION_RATE 80
|
||||||
#define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6
|
#define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6
|
||||||
|
|
||||||
|
|
|
@ -177,6 +177,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
|
||||||
|
|
||||||
queuePool->clearAll();
|
queuePool->clearAll();
|
||||||
Correction* masterCorrection = correction;
|
Correction* masterCorrection = correction;
|
||||||
|
correction->resetCorrection();
|
||||||
if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & FLAGS)
|
if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & FLAGS)
|
||||||
{ // Incrementally tune the word and try all possibilities
|
{ // Incrementally tune the word and try all possibilities
|
||||||
int codesBuffer[getCodesBufferSize(codes, codesSize)];
|
int codesBuffer[getCodesBufferSize(codes, codesSize)];
|
||||||
|
@ -302,6 +303,7 @@ void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int
|
||||||
const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) {
|
const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) {
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
AKLOGI("initSuggest");
|
AKLOGI("initSuggest");
|
||||||
|
DUMP_WORD_INT(codes, inputLength);
|
||||||
}
|
}
|
||||||
proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
|
proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
|
||||||
const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
||||||
|
@ -325,6 +327,16 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
|
||||||
const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||||
Correction *correction, WordsPriorityQueuePool *queuePool,
|
Correction *correction, WordsPriorityQueuePool *queuePool,
|
||||||
const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) {
|
const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) {
|
||||||
|
uint8_t totalTraverseCount = correction->pushAndGetTotalTraverseCount();
|
||||||
|
if (DEBUG_DICT) {
|
||||||
|
AKLOGI("Traverse count %d", totalTraverseCount);
|
||||||
|
}
|
||||||
|
if (totalTraverseCount > MULTIPLE_WORDS_SUGGESTION_MAX_TOTAL_TRAVERSE_COUNT) {
|
||||||
|
if (DEBUG_DICT) {
|
||||||
|
AKLOGI("Abort traversing %d", totalTraverseCount);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
// TODO: Remove setCorrectionParams
|
// TODO: Remove setCorrectionParams
|
||||||
correction->setCorrectionParams(0, 0, 0,
|
correction->setCorrectionParams(0, 0, 0,
|
||||||
-1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
|
-1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
|
||||||
|
@ -411,7 +423,7 @@ inline void UnigramDictionary::onTerminal(const int probability,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UnigramDictionary::getSubStringSuggestion(
|
int UnigramDictionary::getSubStringSuggestion(
|
||||||
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
|
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
|
||||||
const int *codes, const bool useFullEditDistance, Correction *correction,
|
const int *codes, const bool useFullEditDistance, Correction *correction,
|
||||||
WordsPriorityQueuePool* queuePool, const int inputLength,
|
WordsPriorityQueuePool* queuePool, const int inputLength,
|
||||||
|
@ -450,8 +462,9 @@ bool UnigramDictionary::getSubStringSuggestion(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
WordsPriorityQueue* queue = queuePool->getSubQueue(currentWordIndex, inputWordLength);
|
WordsPriorityQueue* queue = queuePool->getSubQueue(currentWordIndex, inputWordLength);
|
||||||
if (!queue || queue->size() < 1) {
|
// TODO: Return the correct value depending on doAutoCompletion
|
||||||
return false;
|
if (!queue || queue->size() <= 0) {
|
||||||
|
return FLAG_MULTIPLE_SUGGEST_ABORT;
|
||||||
}
|
}
|
||||||
int score = 0;
|
int score = 0;
|
||||||
const float ns = queue->getHighestNormalizedScore(
|
const float ns = queue->getHighestNormalizedScore(
|
||||||
|
@ -464,7 +477,7 @@ bool UnigramDictionary::getSubStringSuggestion(
|
||||||
// threshold.
|
// threshold.
|
||||||
if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
|
if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
|
||||||
|| nextWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
|
|| nextWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
|
||||||
return false;
|
return FLAG_MULTIPLE_SUGGEST_SKIP;
|
||||||
}
|
}
|
||||||
freq = score >> (nextWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
|
freq = score >> (nextWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
|
||||||
}
|
}
|
||||||
|
@ -475,7 +488,7 @@ bool UnigramDictionary::getSubStringSuggestion(
|
||||||
}
|
}
|
||||||
if (freq <= 0 || nextWordLength <= 0
|
if (freq <= 0 || nextWordLength <= 0
|
||||||
|| MAX_WORD_LENGTH <= (outputWordStartPos + nextWordLength)) {
|
|| MAX_WORD_LENGTH <= (outputWordStartPos + nextWordLength)) {
|
||||||
return false;
|
return FLAG_MULTIPLE_SUGGEST_SKIP;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < nextWordLength; ++i) {
|
for (int i = 0; i < nextWordLength; ++i) {
|
||||||
outputWord[outputWordStartPos + i] = tempOutputWord[i];
|
outputWord[outputWordStartPos + i] = tempOutputWord[i];
|
||||||
|
@ -492,7 +505,7 @@ bool UnigramDictionary::getSubStringSuggestion(
|
||||||
|
|
||||||
if ((inputWordStartPos + inputWordLength) < inputLength) {
|
if ((inputWordStartPos + inputWordLength) < inputLength) {
|
||||||
if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) {
|
if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) {
|
||||||
return false;
|
return FLAG_MULTIPLE_SUGGEST_SKIP;
|
||||||
}
|
}
|
||||||
outputWord[tempOutputWordLength] = SPACE;
|
outputWord[tempOutputWordLength] = SPACE;
|
||||||
if (outputWordLength) {
|
if (outputWordLength) {
|
||||||
|
@ -513,7 +526,7 @@ bool UnigramDictionary::getSubStringSuggestion(
|
||||||
}
|
}
|
||||||
addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue());
|
addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue());
|
||||||
}
|
}
|
||||||
return true;
|
return FLAG_MULTIPLE_SUGGEST_CONTINUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
|
void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
|
||||||
|
@ -543,11 +556,18 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
|
||||||
// Current word
|
// Current word
|
||||||
int inputWordStartPos = startInputPos;
|
int inputWordStartPos = startInputPos;
|
||||||
int inputWordLength = i - startInputPos;
|
int inputWordLength = i - startInputPos;
|
||||||
if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
|
if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) {
|
||||||
useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
|
break;
|
||||||
startWordIndex, inputWordStartPos, inputWordLength, outputWordLength,
|
}
|
||||||
true /* not used */, freqArray, wordLengthArray, outputWord,
|
const int suggestionFlag = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates,
|
||||||
&tempOutputWordLength)) {
|
codes, useFullEditDistance, correction, queuePool, inputLength,
|
||||||
|
hasAutoCorrectionCandidate, startWordIndex, inputWordStartPos, inputWordLength,
|
||||||
|
outputWordLength, true /* not used */, freqArray, wordLengthArray, outputWord,
|
||||||
|
&tempOutputWordLength);
|
||||||
|
if (suggestionFlag == FLAG_MULTIPLE_SUGGEST_ABORT) {
|
||||||
|
// TODO: break here
|
||||||
|
continue;
|
||||||
|
} else if (suggestionFlag == FLAG_MULTIPLE_SUGGEST_SKIP) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -558,10 +578,11 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
|
||||||
// Missing space
|
// Missing space
|
||||||
inputWordStartPos = i;
|
inputWordStartPos = i;
|
||||||
inputWordLength = inputLength - i;
|
inputWordLength = inputLength - i;
|
||||||
if(!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
|
if(getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
|
||||||
useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
|
useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
|
||||||
startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength,
|
startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength,
|
||||||
false /* missing space */, freqArray, wordLengthArray, outputWord, 0)) {
|
false /* missing space */, freqArray, wordLengthArray, outputWord, 0)
|
||||||
|
!= FLAG_MULTIPLE_SUGGEST_CONTINUE) {
|
||||||
getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes,
|
getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes,
|
||||||
useFullEditDistance, inputLength, correction, queuePool,
|
useFullEditDistance, inputLength, correction, queuePool,
|
||||||
hasAutoCorrectionCandidate, inputWordStartPos, startWordIndex + 1,
|
hasAutoCorrectionCandidate, inputWordStartPos, startWordIndex + 1,
|
||||||
|
|
|
@ -70,6 +70,9 @@ class UnigramDictionary {
|
||||||
static const int DEFAULT_MAX_ERRORS = 2;
|
static const int DEFAULT_MAX_ERRORS = 2;
|
||||||
static const int MAX_ERRORS_FOR_TWO_WORDS = 1;
|
static const int MAX_ERRORS_FOR_TWO_WORDS = 1;
|
||||||
|
|
||||||
|
static const int FLAG_MULTIPLE_SUGGEST_ABORT = 0;
|
||||||
|
static const int FLAG_MULTIPLE_SUGGEST_SKIP = 1;
|
||||||
|
static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2;
|
||||||
UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
|
UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
|
||||||
int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
|
int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
|
||||||
int getFrequency(const int32_t* const inWord, const int length) const;
|
int getFrequency(const int32_t* const inWord, const int length) const;
|
||||||
|
@ -127,7 +130,7 @@ class UnigramDictionary {
|
||||||
ProximityInfo *proximityInfo, unsigned short *word);
|
ProximityInfo *proximityInfo, unsigned short *word);
|
||||||
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
|
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
|
||||||
short unsigned int *outWord);
|
short unsigned int *outWord);
|
||||||
bool getSubStringSuggestion(
|
int getSubStringSuggestion(
|
||||||
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
|
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
|
||||||
const int *codes, const bool useFullEditDistance, Correction *correction,
|
const int *codes, const bool useFullEditDistance, Correction *correction,
|
||||||
WordsPriorityQueuePool* queuePool, const int inputLength,
|
WordsPriorityQueuePool* queuePool, const int inputLength,
|
||||||
|
|
Loading…
Reference in a new issue