am d8096b1a: am 6cbe204f: Fix the performance issue on suggesting aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa

* commit 'd8096b1a12ce1f6f53a2b269956043d77276f007':
  Fix the performance issue on suggesting aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
This commit is contained in:
Satoshi Kataoka 2012-05-30 02:52:42 -07:00 committed by Android Git Automerger
commit 023ca7609e
5 changed files with 66 additions and 15 deletions

View file

@ -110,6 +110,10 @@ Correction::Correction(const int typedLetterMultiplier, const int fullWordMultip
initEditDistance(mEditDistanceTable); initEditDistance(mEditDistanceTable);
} }
void Correction::resetCorrection() {
mTotalTraverseCount = 0;
}
void Correction::initCorrection(const ProximityInfo *pi, const int inputLength, void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
const int maxDepth) { const int maxDepth) {
mProximityInfo = pi; mProximityInfo = pi;

View file

@ -94,6 +94,7 @@ class Correction {
} }
Correction(const int typedLetterMultiplier, const int fullWordMultiplier); Correction(const int typedLetterMultiplier, const int fullWordMultiplier);
void resetCorrection();
void initCorrection( void initCorrection(
const ProximityInfo *pi, const int inputLength, const int maxWordLength); const ProximityInfo *pi, const int inputLength, const int maxWordLength);
void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll); void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll);
@ -129,6 +130,10 @@ class Correction {
bool needsToPrune() const; bool needsToPrune() const;
int pushAndGetTotalTraverseCount() {
return ++mTotalTraverseCount;
}
int getFreqForSplitMultipleWords( int getFreqForSplitMultipleWords(
const int *freqArray, const int *wordLengthArray, const int wordCount, const int *freqArray, const int *wordLengthArray, const int wordCount,
const bool isSpaceProximity, const unsigned short *word); const bool isSpaceProximity, const unsigned short *word);
@ -200,6 +205,8 @@ class Correction {
int mTerminalOutputIndex; int mTerminalOutputIndex;
int mMaxErrors; int mMaxErrors;
uint8_t mTotalTraverseCount;
// The following arrays are state buffer. // The following arrays are state buffer.
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
int mDistances[MAX_WORD_LENGTH_INTERNAL]; int mDistances[MAX_WORD_LENGTH_INTERNAL];

View file

@ -24,6 +24,7 @@
#define AKLOGI ALOGI #define AKLOGI ALOGI
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0) #define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while(0)
static inline void dumpWord(const unsigned short* word, const int length) { static inline void dumpWord(const unsigned short* word, const int length) {
static char charBuf[50]; static char charBuf[50];
@ -35,10 +36,21 @@ static inline void dumpWord(const unsigned short* word, const int length) {
AKLOGI("[ %s ]", charBuf); AKLOGI("[ %s ]", charBuf);
} }
static inline void dumpWordInt(const int* word, const int length) {
static char charBuf[50];
for (int i = 0; i < length; ++i) {
charBuf[i] = word[i];
}
charBuf[length] = 0;
AKLOGI("i[ %s ]", charBuf);
}
#else #else
#define AKLOGE(fmt, ...) #define AKLOGE(fmt, ...)
#define AKLOGI(fmt, ...) #define AKLOGI(fmt, ...)
#define DUMP_WORD(word, length) #define DUMP_WORD(word, length)
#define DUMP_WORD_INT(word, length)
#endif #endif
#ifdef FLAG_DO_PROFILE #ifdef FLAG_DO_PROFILE
@ -223,6 +235,10 @@ static inline void prof_out(void) {
#define SUB_QUEUE_MAX_COUNT 10 #define SUB_QUEUE_MAX_COUNT 10
#define SUB_QUEUE_MIN_WORD_LENGTH 4 #define SUB_QUEUE_MIN_WORD_LENGTH 4
#define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 10 #define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 10
// TODO: Remove this limitation
#define MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH 12
// TODO: Remove this limitation
#define MULTIPLE_WORDS_SUGGESTION_MAX_TOTAL_TRAVERSE_COUNT 110
#define MULTIPLE_WORDS_DEMOTION_RATE 80 #define MULTIPLE_WORDS_DEMOTION_RATE 80
#define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6 #define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6

View file

@ -177,6 +177,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
queuePool->clearAll(); queuePool->clearAll();
Correction* masterCorrection = correction; Correction* masterCorrection = correction;
correction->resetCorrection();
if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & FLAGS) if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & FLAGS)
{ // Incrementally tune the word and try all possibilities { // Incrementally tune the word and try all possibilities
int codesBuffer[getCodesBufferSize(codes, codesSize)]; int codesBuffer[getCodesBufferSize(codes, codesSize)];
@ -302,6 +303,7 @@ void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int
const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) { const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) {
if (DEBUG_DICT) { if (DEBUG_DICT) {
AKLOGI("initSuggest"); AKLOGI("initSuggest");
DUMP_WORD_INT(codes, inputLength);
} }
proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates); proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
@ -325,6 +327,16 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
Correction *correction, WordsPriorityQueuePool *queuePool, Correction *correction, WordsPriorityQueuePool *queuePool,
const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) { const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) {
uint8_t totalTraverseCount = correction->pushAndGetTotalTraverseCount();
if (DEBUG_DICT) {
AKLOGI("Traverse count %d", totalTraverseCount);
}
if (totalTraverseCount > MULTIPLE_WORDS_SUGGESTION_MAX_TOTAL_TRAVERSE_COUNT) {
if (DEBUG_DICT) {
AKLOGI("Abort traversing %d", totalTraverseCount);
}
return;
}
// TODO: Remove setCorrectionParams // TODO: Remove setCorrectionParams
correction->setCorrectionParams(0, 0, 0, correction->setCorrectionParams(0, 0, 0,
-1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance, -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
@ -411,7 +423,7 @@ inline void UnigramDictionary::onTerminal(const int probability,
} }
} }
bool UnigramDictionary::getSubStringSuggestion( int UnigramDictionary::getSubStringSuggestion(
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
const int *codes, const bool useFullEditDistance, Correction *correction, const int *codes, const bool useFullEditDistance, Correction *correction,
WordsPriorityQueuePool* queuePool, const int inputLength, WordsPriorityQueuePool* queuePool, const int inputLength,
@ -450,8 +462,9 @@ bool UnigramDictionary::getSubStringSuggestion(
} }
} }
WordsPriorityQueue* queue = queuePool->getSubQueue(currentWordIndex, inputWordLength); WordsPriorityQueue* queue = queuePool->getSubQueue(currentWordIndex, inputWordLength);
if (!queue || queue->size() < 1) { // TODO: Return the correct value depending on doAutoCompletion
return false; if (!queue || queue->size() <= 0) {
return FLAG_MULTIPLE_SUGGEST_ABORT;
} }
int score = 0; int score = 0;
const float ns = queue->getHighestNormalizedScore( const float ns = queue->getHighestNormalizedScore(
@ -464,7 +477,7 @@ bool UnigramDictionary::getSubStringSuggestion(
// threshold. // threshold.
if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
|| nextWordLength < SUB_QUEUE_MIN_WORD_LENGTH) { || nextWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
return false; return FLAG_MULTIPLE_SUGGEST_SKIP;
} }
freq = score >> (nextWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); freq = score >> (nextWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
} }
@ -475,7 +488,7 @@ bool UnigramDictionary::getSubStringSuggestion(
} }
if (freq <= 0 || nextWordLength <= 0 if (freq <= 0 || nextWordLength <= 0
|| MAX_WORD_LENGTH <= (outputWordStartPos + nextWordLength)) { || MAX_WORD_LENGTH <= (outputWordStartPos + nextWordLength)) {
return false; return FLAG_MULTIPLE_SUGGEST_SKIP;
} }
for (int i = 0; i < nextWordLength; ++i) { for (int i = 0; i < nextWordLength; ++i) {
outputWord[outputWordStartPos + i] = tempOutputWord[i]; outputWord[outputWordStartPos + i] = tempOutputWord[i];
@ -492,7 +505,7 @@ bool UnigramDictionary::getSubStringSuggestion(
if ((inputWordStartPos + inputWordLength) < inputLength) { if ((inputWordStartPos + inputWordLength) < inputLength) {
if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) { if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) {
return false; return FLAG_MULTIPLE_SUGGEST_SKIP;
} }
outputWord[tempOutputWordLength] = SPACE; outputWord[tempOutputWordLength] = SPACE;
if (outputWordLength) { if (outputWordLength) {
@ -513,7 +526,7 @@ bool UnigramDictionary::getSubStringSuggestion(
} }
addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue()); addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue());
} }
return true; return FLAG_MULTIPLE_SUGGEST_CONTINUE;
} }
void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
@ -543,11 +556,18 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
// Current word // Current word
int inputWordStartPos = startInputPos; int inputWordStartPos = startInputPos;
int inputWordLength = i - startInputPos; int inputWordLength = i - startInputPos;
if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) {
useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, break;
startWordIndex, inputWordStartPos, inputWordLength, outputWordLength, }
true /* not used */, freqArray, wordLengthArray, outputWord, const int suggestionFlag = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates,
&tempOutputWordLength)) { codes, useFullEditDistance, correction, queuePool, inputLength,
hasAutoCorrectionCandidate, startWordIndex, inputWordStartPos, inputWordLength,
outputWordLength, true /* not used */, freqArray, wordLengthArray, outputWord,
&tempOutputWordLength);
if (suggestionFlag == FLAG_MULTIPLE_SUGGEST_ABORT) {
// TODO: break here
continue;
} else if (suggestionFlag == FLAG_MULTIPLE_SUGGEST_SKIP) {
continue; continue;
} }
@ -558,10 +578,11 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
// Missing space // Missing space
inputWordStartPos = i; inputWordStartPos = i;
inputWordLength = inputLength - i; inputWordLength = inputLength - i;
if(!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, if(getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength, startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength,
false /* missing space */, freqArray, wordLengthArray, outputWord, 0)) { false /* missing space */, freqArray, wordLengthArray, outputWord, 0)
!= FLAG_MULTIPLE_SUGGEST_CONTINUE) {
getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes, getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes,
useFullEditDistance, inputLength, correction, queuePool, useFullEditDistance, inputLength, correction, queuePool,
hasAutoCorrectionCandidate, inputWordStartPos, startWordIndex + 1, hasAutoCorrectionCandidate, inputWordStartPos, startWordIndex + 1,

View file

@ -70,6 +70,9 @@ class UnigramDictionary {
static const int DEFAULT_MAX_ERRORS = 2; static const int DEFAULT_MAX_ERRORS = 2;
static const int MAX_ERRORS_FOR_TWO_WORDS = 1; static const int MAX_ERRORS_FOR_TWO_WORDS = 1;
static const int FLAG_MULTIPLE_SUGGEST_ABORT = 0;
static const int FLAG_MULTIPLE_SUGGEST_SKIP = 1;
static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2;
UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler, UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags); int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
int getFrequency(const int32_t* const inWord, const int length) const; int getFrequency(const int32_t* const inWord, const int length) const;
@ -127,7 +130,7 @@ class UnigramDictionary {
ProximityInfo *proximityInfo, unsigned short *word); ProximityInfo *proximityInfo, unsigned short *word);
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
short unsigned int *outWord); short unsigned int *outWord);
bool getSubStringSuggestion( int getSubStringSuggestion(
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
const int *codes, const bool useFullEditDistance, Correction *correction, const int *codes, const bool useFullEditDistance, Correction *correction,
WordsPriorityQueuePool* queuePool, const int inputLength, WordsPriorityQueuePool* queuePool, const int inputLength,