am 67e13976: Merge "Store suggestions for each input length for missing space algorithm etc."

* commit '67e13976b78619ec7bc45b5377a9f5da2534cfd5':
  Store suggestions for each input length for missing space algorithm etc.
main
satok 2012-01-16 00:20:06 -08:00 committed by Android Git Automerger
commit 53c4248c6f
7 changed files with 99 additions and 65 deletions

View File

@ -214,21 +214,11 @@ int Correction::goDownTree(
return mOutputIndex; return mOutputIndex;
} }
// TODO: remove
int Correction::getOutputIndex() {
return mOutputIndex;
}
// TODO: remove // TODO: remove
int Correction::getInputIndex() { int Correction::getInputIndex() {
return mInputIndex; return mInputIndex;
} }
// TODO: remove
bool Correction::needsToTraverseAllNodes() {
return mNeedsToTraverseAllNodes;
}
void Correction::incrementInputIndex() { void Correction::incrementInputIndex() {
++mInputIndex; ++mInputIndex;
} }
@ -278,13 +268,12 @@ void Correction::addCharToCurrentWord(const int32_t c) {
mWord, mOutputIndex + 1); mWord, mOutputIndex + 1);
} }
// TODO: inline?
Correction::CorrectionType Correction::processSkipChar( Correction::CorrectionType Correction::processSkipChar(
const int32_t c, const bool isTerminal, const bool inputIndexIncremented) { const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
addCharToCurrentWord(c); addCharToCurrentWord(c);
if (needsToTraverseAllNodes() && isTerminal) { mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0); mTerminalOutputIndex = mOutputIndex;
mTerminalOutputIndex = mOutputIndex; if (mNeedsToTraverseAllNodes && isTerminal) {
incrementOutputIndex(); incrementOutputIndex();
return TRAVERSE_ALL_ON_TERMINAL; return TRAVERSE_ALL_ON_TERMINAL;
} else { } else {
@ -293,6 +282,13 @@ Correction::CorrectionType Correction::processSkipChar(
} }
} }
Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
// Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
mTerminalInputIndex = mInputIndex;
mTerminalOutputIndex = mOutputIndex;
return UNRELATED;
}
inline bool isEquivalentChar(ProximityInfo::ProximityType type) { inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
return type == ProximityInfo::EQUIVALENT_CHAR; return type == ProximityInfo::EQUIVALENT_CHAR;
} }
@ -301,7 +297,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
const int32_t c, const bool isTerminal) { const int32_t c, const bool isTerminal) {
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
if (correctionCount > mMaxErrors) { if (correctionCount > mMaxErrors) {
return UNRELATED; return processUnrelatedCorrectionType();
} }
// TODO: Change the limit if we'll allow two or more corrections // TODO: Change the limit if we'll allow two or more corrections
@ -381,7 +377,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c); mTransposedCount, mExcessiveCount, c);
} }
return UNRELATED; return processUnrelatedCorrectionType();
} }
} }
@ -484,7 +480,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c); mTransposedCount, mExcessiveCount, c);
} }
return UNRELATED; return processUnrelatedCorrectionType();
} }
} else if (secondTransposing) { } else if (secondTransposing) {
// If inputIndex is greater than mInputLength, that means there is no // If inputIndex is greater than mInputLength, that means there is no
@ -539,6 +535,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
} }
return ON_TERMINAL; return ON_TERMINAL;
} else { } else {
mTerminalInputIndex = mInputIndex - 1;
mTerminalOutputIndex = mOutputIndex - 1;
return NOT_ON_TERMINAL; return NOT_ON_TERMINAL;
} }
} }

View File

@ -48,7 +48,6 @@ class Correction {
void checkState(); void checkState();
bool initProcessState(const int index); bool initProcessState(const int index);
int getOutputIndex();
int getInputIndex(); int getInputIndex();
virtual ~Correction(); virtual ~Correction();
@ -115,11 +114,11 @@ class Correction {
private: private:
inline void incrementInputIndex(); inline void incrementInputIndex();
inline void incrementOutputIndex(); inline void incrementOutputIndex();
inline bool needsToTraverseAllNodes();
inline void startToTraverseAllNodes(); inline void startToTraverseAllNodes();
inline bool isQuote(const unsigned short c); inline bool isQuote(const unsigned short c);
inline CorrectionType processSkipChar( inline CorrectionType processSkipChar(
const int32_t c, const bool isTerminal, const bool inputIndexIncremented); const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
inline CorrectionType processUnrelatedCorrectionType();
inline void addCharToCurrentWord(const int32_t c); inline void addCharToCurrentWord(const int32_t c);
const int TYPED_LETTER_MULTIPLIER; const int TYPED_LETTER_MULTIPLIER;

View File

@ -22,9 +22,23 @@
#include <cutils/log.h> #include <cutils/log.h>
#define AKLOGE ALOGE #define AKLOGE ALOGE
#define AKLOGI ALOGI #define AKLOGI ALOGI
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
static char charBuf[50];
static void dumpWord(const unsigned short* word, const int length) {
for (int i = 0; i < length; ++i) {
charBuf[i] = word[i];
}
charBuf[length] = 0;
AKLOGI("[ %s ]", charBuf);
}
#else #else
#define AKLOGE(fmt, ...) #define AKLOGE(fmt, ...)
#define AKLOGI(fmt, ...) #define AKLOGI(fmt, ...)
#define DUMP_WORD(word, length)
#endif #endif
#ifdef FLAG_DO_PROFILE #ifdef FLAG_DO_PROFILE
@ -106,18 +120,6 @@ static void prof_out(void) {
#define DEBUG_CORRECTION_FREQ true #define DEBUG_CORRECTION_FREQ true
#define DEBUG_WORDS_PRIORITY_QUEUE true #define DEBUG_WORDS_PRIORITY_QUEUE true
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
static char charBuf[50];
static void dumpWord(const unsigned short* word, const int length) {
for (int i = 0; i < length; ++i) {
charBuf[i] = word[i];
}
charBuf[length] = 0;
AKLOGI("[ %s ]", charBuf);
}
#else // FLAG_DBG #else // FLAG_DBG
#define DEBUG_DICT false #define DEBUG_DICT false
@ -131,7 +133,6 @@ static void dumpWord(const unsigned short* word, const int length) {
#define DEBUG_CORRECTION_FREQ false #define DEBUG_CORRECTION_FREQ false
#define DEBUG_WORDS_PRIORITY_QUEUE false #define DEBUG_WORDS_PRIORITY_QUEUE false
#define DUMP_WORD(word, length)
#endif // FLAG_DBG #endif // FLAG_DBG
@ -207,7 +208,8 @@ static void dumpWord(const unsigned short* word, const int length) {
// Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used // Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used
// for better performance. // for better performance.
#define SUB_QUEUE_MAX_WORDS 5 // Holds up to 1 candidate for each word
#define SUB_QUEUE_MAX_WORDS 1
#define SUB_QUEUE_MAX_COUNT 10 #define SUB_QUEUE_MAX_COUNT 10
#define MAX_DEPTH_MULTIPLIER 3 #define MAX_DEPTH_MULTIPLIER 3

View File

@ -186,7 +186,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_OPEN; PROF_OPEN;
PROF_START(0); PROF_START(0);
// Note: This line is intentionally left blank queuePool->clearAll();
PROF_END(0); PROF_END(0);
PROF_START(1); PROF_START(1);
@ -241,18 +241,17 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
} }
} }
PROF_END(6); PROF_END(6);
if (DEBUG_WORDS_PRIORITY_QUEUE) {
queuePool->dumpSubQueue1TopSuggestions();
}
} }
void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates, void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
const int *yCoordinates, const int *codes, const int inputLength, const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) {
WordsPriorityQueue *queue, Correction *correction) {
if (DEBUG_DICT) { if (DEBUG_DICT) {
AKLOGI("initSuggest"); AKLOGI("initSuggest");
} }
proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates); proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
if (queue) {
queue->clear();
}
const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
correction->initCorrection(proximityInfo, inputLength, maxDepth); correction->initCorrection(proximityInfo, inputLength, maxDepth);
} }
@ -264,15 +263,13 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, Correction *correction, const bool useFullEditDistance, const int inputLength, Correction *correction,
WordsPriorityQueuePool *queuePool) { WordsPriorityQueuePool *queuePool) {
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
initSuggestions( getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool,
proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction);
getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue,
true /* doAutoCompletion */, DEFAULT_MAX_ERRORS); true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
} }
void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
const int inputLength, Correction *correction, WordsPriorityQueue *queue, const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool,
const bool doAutoCompletion, const int maxErrors) { const bool doAutoCompletion, const int maxErrors) {
// TODO: Remove setCorrectionParams // TODO: Remove setCorrectionParams
correction->setCorrectionParams(0, 0, 0, correction->setCorrectionParams(0, 0, 0,
@ -292,7 +289,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
int firstChildPos; int firstChildPos;
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
correction, &childCount, &firstChildPos, &siblingPos, queue); correction, &childCount, &firstChildPos, &siblingPos, queuePool);
// Update next sibling pos // Update next sibling pos
correction->setTreeSiblingPos(outputIndex, siblingPos); correction->setTreeSiblingPos(outputIndex, siblingPos);
@ -327,14 +324,34 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons
inline void UnigramDictionary::onTerminal(const int freq, inline void UnigramDictionary::onTerminal(const int freq,
const TerminalAttributes& terminalAttributes, Correction *correction, const TerminalAttributes& terminalAttributes, Correction *correction,
WordsPriorityQueue *queue) { WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) {
const int inputIndex = correction->getInputIndex();
const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
if (!addToMasterQueue && !addToSubQueue) {
return;
}
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex);
int wordLength; int wordLength;
unsigned short* wordPointer; unsigned short* wordPointer;
const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
if (finalFreq >= 0) { if (finalFreq >= 0) {
if (!terminalAttributes.isShortcutOnly()) { if (!terminalAttributes.isShortcutOnly()) {
addWord(wordPointer, wordLength, finalFreq, queue); if (addToMasterQueue) {
addWord(wordPointer, wordLength, finalFreq, masterQueue);
}
// TODO: Check the validity of "inputIndex == wordLength"
//if (addToSubQueue && inputIndex == wordLength) {
if (addToSubQueue) {
addWord(wordPointer, wordLength, finalFreq, subQueue);
}
} }
// Please note that the shortcut candidates will be added to the master queue only.
if (!addToMasterQueue) {
return;
}
// From here, below is the code to add shortcut candidates.
TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator(); TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
while (iterator.hasNextShortcutTarget()) { while (iterator.hasNextShortcutTarget()) {
// TODO: addWord only supports weak ordering, meaning we have no means to control the // TODO: addWord only supports weak ordering, meaning we have no means to control the
@ -345,7 +362,7 @@ inline void UnigramDictionary::onTerminal(const int freq,
uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
const int shortcutTargetStringLength = iterator.getNextShortcutTarget( const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
MAX_WORD_LENGTH_INTERNAL, shortcutTarget); MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, queue); addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue);
} }
} }
} }
@ -411,8 +428,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
} }
// TODO: Remove initSuggestions and correction->setCorrectionParams // TODO: Remove initSuggestions and correction->setCorrectionParams
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
0 /* do not clear queue */, correction);
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, spaceProximityPos, missingSpacePos, -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
@ -584,7 +600,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
// given level, as output into newCount when traversing this level's parent. // given level, as output into newCount when traversing this level's parent.
inline bool UnigramDictionary::processCurrentNode(const int initialPos, inline bool UnigramDictionary::processCurrentNode(const int initialPos,
Correction *correction, int *newCount, Correction *correction, int *newCount,
int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueue *queue) { int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool) {
if (DEBUG_DICT) { if (DEBUG_DICT) {
correction->checkState(); correction->checkState();
} }
@ -659,15 +675,13 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
} while (NOT_A_CHARACTER != c); } while (NOT_A_CHARACTER != c);
if (isTerminalNode) { if (isTerminalNode) {
if (needsToInvokeOnTerminal) { // The frequency should be here, because we come here only if this is actually
// The frequency should be here, because we come here only if this is actually // a terminal node, and we are on its last char.
// a terminal node, and we are on its last char. const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos); const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos); TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos); onTerminal(freq, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal);
onTerminal(freq, terminalAttributes, correction, queue);
}
// If there are more chars in this node, then this virtual node has children. // If there are more chars in this node, then this virtual node has children.
// If we are on the last char, this virtual node has children if this node has. // If we are on the last char, this virtual node has children if this node has.

View File

@ -93,14 +93,13 @@ class UnigramDictionary {
const int codesRemain, const int currentDepth, int* codesDest, Correction *correction, const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
WordsPriorityQueuePool* queuePool); WordsPriorityQueuePool* queuePool);
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, const int *ycoordinates, const int *codes, const int codesSize, Correction *correction);
WordsPriorityQueue *queue, Correction *correction);
void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool); const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
void getSuggestionCandidates( void getSuggestionCandidates(
const bool useFullEditDistance, const int inputLength, Correction *correction, const bool useFullEditDistance, const int inputLength, Correction *correction,
WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors); WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors);
void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
@ -114,12 +113,12 @@ class UnigramDictionary {
const int inputLength, const int spaceProximityPos, Correction *correction, const int inputLength, const int spaceProximityPos, Correction *correction,
WordsPriorityQueuePool* queuePool); WordsPriorityQueuePool* queuePool);
void onTerminal(const int freq, const TerminalAttributes& terminalAttributes, void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
Correction *correction, WordsPriorityQueue *queue); Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue);
bool needsToSkipCurrentNode(const unsigned short c, bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth); const int inputIndex, const int skipPos, const int depth);
// Process a node by considering proximity, missing and excessive character // Process a node by considering proximity, missing and excessive character
bool processCurrentNode(const int initialPos, Correction *correction, int *newCount, bool processCurrentNode(const int initialPos, Correction *correction, int *newCount,
int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueue *queue); int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool);
int getMostFrequentWordLike(const int startInputIndex, const int inputLength, int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
ProximityInfo *proximityInfo, unsigned short *word); ProximityInfo *proximityInfo, unsigned short *word);
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,

View File

@ -128,6 +128,13 @@ class WordsPriorityQueue {
} }
} }
void dumpTopWord() {
if (size() <= 0) {
return;
}
DUMP_WORD(mSuggestions.top()->mWord, mSuggestions.top()->mWordLength);
}
private: private:
struct wordComparator { struct wordComparator {
bool operator ()(SuggestedWord * left, SuggestedWord * right) { bool operator ()(SuggestedWord * left, SuggestedWord * right) {

View File

@ -58,6 +58,21 @@ class WordsPriorityQueuePool {
return mSubQueues2[id]; return mSubQueues2[id];
} }
inline void clearAll() {
mMasterQueue->clear();
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
mSubQueues1[i]->clear();
mSubQueues2[i]->clear();
}
}
void dumpSubQueue1TopSuggestions() {
AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS");
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
mSubQueues1[i]->dumpTopWord();
}
}
private: private:
WordsPriorityQueue* mMasterQueue; WordsPriorityQueue* mMasterQueue;
WordsPriorityQueue* mSubQueues1[SUB_QUEUE_MAX_COUNT]; WordsPriorityQueue* mSubQueues1[SUB_QUEUE_MAX_COUNT];