From 7409d151a18e64450f40e9d798fb3d28ef4a4d76 Mon Sep 17 00:00:00 2001 From: satok Date: Thu, 26 Jan 2012 16:13:25 +0900 Subject: [PATCH] Refactor words priority queue Change-Id: I14b7ef39263ad2b1d5ec087bc80b7b8d7c30abe7 --- native/src/defines.h | 1 + native/src/unigram_dictionary.cpp | 38 +++++++++++++++++++------- native/src/unigram_dictionary.h | 6 ++++ native/src/words_priority_queue_pool.h | 37 ++++++++++++------------- 4 files changed, 52 insertions(+), 30 deletions(-) diff --git a/native/src/defines.h b/native/src/defines.h index 9c2d08777..7e171acfd 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -217,6 +217,7 @@ static void prof_out(void) { #define SUB_QUEUE_MAX_WORDS 1 #define SUB_QUEUE_MAX_COUNT 10 #define SUB_QUEUE_MIN_WORD_LENGTH 4 +#define SUB_QUEUE_MAX_WORD_INDEX 2 #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39 #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22 diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index 6a8973761..0c738e041 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -260,7 +260,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, if (DEBUG_DICT) { queuePool->dumpSubQueue1TopSuggestions(); for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - WordsPriorityQueue* queue = queuePool->getSubQueue1(i); + WordsPriorityQueue* queue = queuePool->getSubQueue(FIRST_WORD_INDEX, i); if (queue->size() > 0) { WordsPriorityQueue::SuggestedWord* sw = queue->top(); const int score = sw->mScore; @@ -395,11 +395,8 @@ inline void UnigramDictionary::onTerminal(const int freq, // or more length. if (inputIndex >= SUB_QUEUE_MIN_WORD_LENGTH && addToSubQueue) { WordsPriorityQueue *subQueue; - if (currentWordIndex == 1) { - subQueue = queuePool->getSubQueue1(inputIndex); - } else if (currentWordIndex == 2) { - subQueue = queuePool->getSubQueue2(inputIndex); - } else { + subQueue = queuePool->getSubQueue(currentWordIndex, inputIndex); + if (!subQueue) { return; } const int finalFreq = correction->getFinalFreqForSubQueue(freq, &wordPointer, &wordLength, @@ -408,6 +405,25 @@ inline void UnigramDictionary::onTerminal(const int freq, } } +int UnigramDictionary::getSubStringSuggestion( + ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, + const int *codes, const bool useFullEditDistance, const Correction *correction, + WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate, + const int currentWordIndex, const int inputWordStartPos, const int inputWordLength, + const int outputWordStartPos, unsigned short* outputWord, int *outputWordLength) { +// under constructiong +// unsigned short* tempOutputWord = 0; +// int tempOutputWordLength = 0; +// int freq = getMostFrequentWordLike( +// inputWordStartPos, inputWordLength, proximityInfo, mWord); +// if (freq > 0) { +// tempOutputWordLength = inputWordLength; +// tempOutputWord = mWord; +// } else if (!hasAutoCorrectionCandidate) { +// } + return 0; +} + void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int missingSpacePos, @@ -439,7 +455,8 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo firstOutputWordLength = firstInputWordLength; firstOutputWord = mWord; } else if (!hasAutoCorrectionCandidate) { - WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstInputWordLength); + WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue( + FIRST_WORD_INDEX, firstInputWordLength); if (!firstWordQueue || firstWordQueue->size() < 1) { return; } @@ -497,16 +514,17 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo const int offset = secondInputWordStartPos; initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset], codes + offset * MAX_PROXIMITY_CHARS, secondInputWordLength, correction); - queuePool->clearSubQueue2(); + queuePool->clearSubQueue(SECOND_WORD_INDEX); getSuggestionCandidates(useFullEditDistance, secondInputWordLength, correction, queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, SECOND_WORD_INDEX); if (DEBUG_DICT) { AKLOGI("Dump second word candidates %d", secondInputWordLength); for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - queuePool->getSubQueue2(i)->dumpTopWord(); + queuePool->getSubQueue(SECOND_WORD_INDEX, i)->dumpTopWord(); } } - WordsPriorityQueue* secondWordQueue = queuePool->getSubQueue2(secondInputWordLength); + WordsPriorityQueue* secondWordQueue = queuePool->getSubQueue( + SECOND_WORD_INDEX, secondInputWordLength); if (!secondWordQueue || secondWordQueue->size() < 1) { return; } diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index 0b8271954..f81524bbc 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -127,6 +127,12 @@ class UnigramDictionary { ProximityInfo *proximityInfo, unsigned short *word); int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, short unsigned int *outWord); + int getSubStringSuggestion( + ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, + const int *codes, const bool useFullEditDistance, const Correction *correction, + WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate, + const int currentWordIndex, const int inputWordStartPos, const int inputWordLength, + const int outputWordStartPos, unsigned short* outputWord, int *outputWordLength); const uint8_t* const DICT_ROOT; const int MAX_WORD_LENGTH; diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h index 599b89711..a4aa8b6ca 100644 --- a/native/src/words_priority_queue_pool.h +++ b/native/src/words_priority_queue_pool.h @@ -43,25 +43,24 @@ class WordsPriorityQueuePool { return mMasterQueue; } - // TODO: Come up with more generic pool - WordsPriorityQueue* getSubQueue1(const int id) { - if (id < 0 || id >= SUB_QUEUE_MAX_COUNT) { + WordsPriorityQueue* getSubQueue(const int wordIndex, const int inputWordLength) { + if (wordIndex > SUB_QUEUE_MAX_WORD_INDEX) { + return 0; + } + if (inputWordLength < 0 || inputWordLength >= SUB_QUEUE_MAX_COUNT) { if (DEBUG_WORDS_PRIORITY_QUEUE) { assert(false); } return 0; } - return mSubQueues1[id]; - } - - WordsPriorityQueue* getSubQueue2(const int id) { - if (id < 0 || id >= SUB_QUEUE_MAX_COUNT) { - if (DEBUG_WORDS_PRIORITY_QUEUE) { - assert(false); - } + // TODO: Come up with more generic pool + if (wordIndex == 1) { + return mSubQueues1[inputWordLength]; + } else if (wordIndex == 2) { + return mSubQueues2[inputWordLength]; + } else { return 0; } - return mSubQueues2[id]; } inline void clearAll() { @@ -72,15 +71,13 @@ class WordsPriorityQueuePool { } } - inline void clearSubQueue1() { + inline void clearSubQueue(const int wordIndex) { for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - mSubQueues1[i]->clear(); - } - } - - inline void clearSubQueue2() { - for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - mSubQueues2[i]->clear(); + if (wordIndex == 1) { + mSubQueues1[i]->clear(); + } else if (wordIndex == 2) { + mSubQueues2[i]->clear(); + } } }