From a7e5a5a6b9ed1367a4bb81c874d0fde7de85a787 Mon Sep 17 00:00:00 2001 From: satok Date: Thu, 15 Dec 2011 16:49:12 +0900 Subject: [PATCH] Add words priority queue pool Change-Id: I152df7b876a1756b69ded2ca4fb3ee26b38c971f --- native/src/defines.h | 4 ++ native/src/dictionary.cpp | 5 ++- native/src/dictionary.h | 6 +-- native/src/unigram_dictionary.cpp | 52 +++++++++++++---------- native/src/unigram_dictionary.h | 13 +++--- native/src/words_priority_queue.h | 58 ++++++++++++++------------ native/src/words_priority_queue_pool.h | 53 +++++++++++++++++++++++ 7 files changed, 131 insertions(+), 60 deletions(-) create mode 100644 native/src/words_priority_queue_pool.h diff --git a/native/src/defines.h b/native/src/defines.h index ea643bf07..e1d2ca351 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -202,6 +202,10 @@ static void dumpWord(const unsigned short* word, const int length) { // This is only used for the size of array. Not to be used in c functions. #define MAX_WORD_LENGTH_INTERNAL 48 +// Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used +// for better performance. +#define SUB_QUEUE_MAX_WORDS 5 + #define MAX_DEPTH_MULTIPLIER 3 // TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp index 55358ec81..e3673d425 100644 --- a/native/src/dictionary.cpp +++ b/native/src/dictionary.cpp @@ -39,7 +39,8 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, } } mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier); - mWordsPriorityQueue = new WordsPriorityQueue(maxWords, maxWordLength); + mWordsPriorityQueuePool = new WordsPriorityQueuePool( + maxWords, SUB_QUEUE_MAX_WORDS, maxWordLength); mUnigramDictionary = new UnigramDictionary(mDict, typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION); mBigramDictionary = new BigramDictionary(mDict, maxWordLength, maxAlternatives, @@ -48,7 +49,7 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, Dictionary::~Dictionary() { delete mCorrection; - delete mWordsPriorityQueue; + delete mWordsPriorityQueuePool; delete mUnigramDictionary; delete mBigramDictionary; } diff --git a/native/src/dictionary.h b/native/src/dictionary.h index 2a6e4e0d5..52048ecca 100644 --- a/native/src/dictionary.h +++ b/native/src/dictionary.h @@ -23,7 +23,7 @@ #include "defines.h" #include "proximity_info.h" #include "unigram_dictionary.h" -#include "words_priority_queue.h" +#include "words_priority_queue_pool.h" namespace latinime { @@ -34,7 +34,7 @@ public: int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates, int *codes, int codesSize, int flags, unsigned short *outWords, int *frequencies) { - return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueue, + return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool, mCorrection, xcoordinates, ycoordinates, codes, codesSize, flags, outWords, frequencies); } @@ -81,7 +81,7 @@ private: const bool IS_LATEST_DICT_VERSION; UnigramDictionary *mUnigramDictionary; BigramDictionary *mBigramDictionary; - WordsPriorityQueue *mWordsPriorityQueue; + WordsPriorityQueuePool *mWordsPriorityQueuePool; Correction *mCorrection; }; diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index ac9f53ed2..0db33e7d8 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -93,7 +93,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit const int *xcoordinates, const int *ycoordinates, const int *codesBuffer, const int codesBufferSize, const int flags, const int *codesSrc, const int codesRemain, const int currentDepth, int *codesDest, Correction *correction, - WordsPriorityQueue *queue) { + WordsPriorityQueuePool *queuePool) { if (currentDepth < MAX_UMLAUT_SEARCH_DEPTH) { for (int i = 0; i < codesRemain; ++i) { @@ -110,7 +110,8 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, codesBufferSize, flags, codesSrc + (i + 1) * MAX_PROXIMITY_CHARS, codesRemain - i - 1, - currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, correction, queue); + currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, correction, + queuePool); // Copy the second char of the digraph in place, then continue processing on // the remaining part of the word. @@ -120,7 +121,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, codesBufferSize, flags, codesSrc + i * MAX_PROXIMITY_CHARS, codesRemain - i, currentDepth + 1, - codesDest + i * MAX_PROXIMITY_CHARS, correction, queue); + codesDest + i * MAX_PROXIMITY_CHARS, correction, queuePool); return; } } @@ -137,27 +138,28 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codesBuffer, (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, flags, correction, - queue); + queuePool); } -int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueue *queue, - Correction *correction, const int *xcoordinates, const int *ycoordinates, const int *codes, - const int codesSize, const int flags, unsigned short *outWords, int *frequencies) { +int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, + WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates, + const int *ycoordinates, const int *codes, const int codesSize, const int flags, + unsigned short *outWords, int *frequencies) { - WordsPriorityQueue* masterQueue = queue; Correction* masterCorrection = correction; if (REQUIRES_GERMAN_UMLAUT_PROCESSING & flags) { // Incrementally tune the word and try all possibilities int codesBuffer[getCodesBufferSize(codes, codesSize, MAX_PROXIMITY_CHARS)]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, - codesSize, flags, codes, codesSize, 0, codesBuffer, masterCorrection, masterQueue); + codesSize, flags, codes, codesSize, 0, codesBuffer, masterCorrection, queuePool); } else { // Normal processing getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, flags, - masterCorrection, masterQueue); + masterCorrection, queuePool); } PROF_START(20); - const int suggestedWordsCount = masterQueue->outputSuggestions(frequencies, outWords); + const int suggestedWordsCount = + queuePool->getMasterQueue()->outputSuggestions(frequencies, outWords); if (DEBUG_DICT) { LOGI("Returning %d words", suggestedWordsCount); @@ -178,11 +180,13 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, WordsPriorit void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, - const int inputLength, const int flags, Correction *correction, WordsPriorityQueue *queue) { + const int inputLength, const int flags, Correction *correction, + WordsPriorityQueuePool *queuePool) { + WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); PROF_OPEN; PROF_START(0); - initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, queue); + initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue); if (DEBUG_DICT) assert(codesSize == inputLength); const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); @@ -192,7 +196,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const bool useFullEditDistance = USE_FULL_EDIT_DISTANCE & flags; // TODO: remove PROF_START(1); - getSuggestionCandidates(useFullEditDistance, inputLength, correction, queue); + getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue); PROF_END(1); PROF_START(2); @@ -216,7 +220,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, LOGI("--- Suggest missing space characters %d", i); } getMissingSpaceWords( - inputLength, i, proximityInfo, correction, useFullEditDistance, queue); + inputLength, i, proximityInfo, correction, useFullEditDistance, queuePool); } } PROF_END(5); @@ -235,8 +239,8 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, i, x, y, proximityInfo->hasSpaceProximity(x, y)); } if (proximityInfo->hasSpaceProximity(x, y)) { - getMistypedSpaceWords( - inputLength, i, proximityInfo, correction, useFullEditDistance, queue); + getMistypedSpaceWords(inputLength, i, proximityInfo, correction, + useFullEditDistance, queuePool); } } } @@ -293,20 +297,20 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, void UnigramDictionary::getMissingSpaceWords( const int inputLength, const int missingSpacePos, ProximityInfo *proximityInfo, - Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) { + Correction *correction, const bool useFullEditDistance, WordsPriorityQueuePool *queuePool) { correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos, useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); - getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue); + getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queuePool); } void UnigramDictionary::getMistypedSpaceWords( const int inputLength, const int spaceProximityPos, ProximityInfo *proximityInfo, - Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) { + Correction *correction, const bool useFullEditDistance, WordsPriorityQueuePool *queuePool) { correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */, useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); - getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue); + getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queuePool); } inline void UnigramDictionary::onTerminal( @@ -321,7 +325,9 @@ inline void UnigramDictionary::onTerminal( void UnigramDictionary::getSplitTwoWordsSuggestion( const int inputLength, ProximityInfo *proximityInfo, Correction *correction, - WordsPriorityQueue *queue) { + WordsPriorityQueuePool *queuePool) { + WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); + const int spaceProximityPos = correction->getSpaceProximityPos(); const int missingSpacePos = correction->getMissingSpacePos(); if (DEBUG_DICT) { @@ -373,7 +379,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestion( if (DEBUG_DICT) { LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); } - addWord(word, newWordLength, pairFreq, queue); + addWord(word, newWordLength, pairFreq, masterQueue); return; } diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index f5cb43860..ce15cdd8f 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -23,6 +23,7 @@ #include "defines.h" #include "proximity_info.h" #include "words_priority_queue.h" +#include "words_priority_queue_pool.h" namespace latinime { @@ -70,7 +71,7 @@ public: const bool isLatestDictVersion); bool isValidWord(const uint16_t* const inWord, const int length) const; int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; - int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueue *queue, + int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const int flags, unsigned short *outWords, int *frequencies); @@ -80,13 +81,13 @@ private: void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int inputLength, - const int flags, Correction *correction, WordsPriorityQueue *queue); + const int flags, Correction *correction, WordsPriorityQueuePool *queuePool); bool isDigraph(const int *codes, const int i, const int codesSize) const; void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain, const int currentDepth, int* codesDest, Correction *correction, - WordsPriorityQueue* queue); + WordsPriorityQueuePool* queuePool); void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, WordsPriorityQueue *queue); @@ -94,13 +95,13 @@ private: const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueue* queue); void getSplitTwoWordsSuggestion(const int inputLength, ProximityInfo *proximityInfo, - Correction *correction, WordsPriorityQueue *queue); + Correction *correction, WordsPriorityQueuePool *queuePool); void getMissingSpaceWords(const int inputLength, const int missingSpacePos, ProximityInfo *proximityInfo, Correction *correction, - const bool useFullEditDistance, WordsPriorityQueue *queue); + const bool useFullEditDistance, WordsPriorityQueuePool *queuePool); void getMistypedSpaceWords(const int inputLength, const int spaceProximityPos, ProximityInfo *proximityInfo, Correction *correction, - const bool useFullEditDistance, WordsPriorityQueue *queue); + const bool useFullEditDistance, WordsPriorityQueuePool *queuePool); void onTerminal(const int freq, Correction *correction, WordsPriorityQueue *queue); bool needsToSkipCurrentNode(const unsigned short c, const int inputIndex, const int skipPos, const int depth); diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h index 366b1b67a..a4175d3e0 100644 --- a/native/src/words_priority_queue.h +++ b/native/src/words_priority_queue.h @@ -24,7 +24,7 @@ namespace latinime { class WordsPriorityQueue { -private: +public: class SuggestedWord { public: int mScore; @@ -40,31 +40,6 @@ private: } }; - struct wordComparator { - bool operator ()(SuggestedWord * left, SuggestedWord * right) { - return left->mScore > right->mScore; - } - }; - - SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word, - int wordLength) { - for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) { - if (!mSuggestedWords[i].mUsed) { - mSuggestedWords[i].setParams(score, word, wordLength); - return &mSuggestedWords[i]; - } - } - return 0; - } - - typedef std::priority_queue, - wordComparator> Suggestions; - Suggestions mSuggestions; - const unsigned int MAX_WORDS; - const unsigned int MAX_WORD_LENGTH; - SuggestedWord* mSuggestedWords; - -public: WordsPriorityQueue(int maxWords, int maxWordLength) : MAX_WORDS((unsigned int) maxWords), MAX_WORD_LENGTH( (unsigned int) maxWordLength) { @@ -105,6 +80,13 @@ public: mSuggestions.push(sw); } + SuggestedWord* topAndPop() { + if (mSuggestions.empty()) return 0; + SuggestedWord* sw = mSuggestions.top(); + mSuggestions.pop(); + return sw; + } + int outputSuggestions(int *frequencies, unsigned short *outputChars) { const unsigned int size = min(MAX_WORDS, mSuggestions.size()); int index = size - 1; @@ -140,6 +122,30 @@ public: mSuggestions.pop(); } } +private: + struct wordComparator { + bool operator ()(SuggestedWord * left, SuggestedWord * right) { + return left->mScore > right->mScore; + } + }; + + SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word, + int wordLength) { + for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) { + if (!mSuggestedWords[i].mUsed) { + mSuggestedWords[i].setParams(score, word, wordLength); + return &mSuggestedWords[i]; + } + } + return 0; + } + + typedef std::priority_queue, + wordComparator> Suggestions; + Suggestions mSuggestions; + const unsigned int MAX_WORDS; + const unsigned int MAX_WORD_LENGTH; + SuggestedWord* mSuggestedWords; }; } diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h new file mode 100644 index 000000000..d964bfc3b --- /dev/null +++ b/native/src/words_priority_queue_pool.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_WORDS_PRIORITY_QUEUE_POOL_H +#define LATINIME_WORDS_PRIORITY_QUEUE_POOL_H + +#include "words_priority_queue.h" + +namespace latinime { + +class WordsPriorityQueuePool { +public: + WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) { + mMasterQueue = new WordsPriorityQueue(mainQueueMaxWords, maxWordLength); + mSubQueue1 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength); + mSubQueue2 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength); + } + + ~WordsPriorityQueuePool() { + delete mMasterQueue; + } + + WordsPriorityQueue* getMasterQueue() { + return mMasterQueue; + } + // TODO: Come up with more generic pool + WordsPriorityQueue* getSubQueue1() { + return mSubQueue1; + } + WordsPriorityQueue* getSubQueue2() { + return mSubQueue2; + } +private: + WordsPriorityQueue *mMasterQueue; + WordsPriorityQueue *mSubQueue1; + WordsPriorityQueue *mSubQueue2; +}; +} + +#endif // LATINIME_WORDS_PRIORITY_QUEUE_POOL_H