From 16379df633feaefb118257096956869edfd25a2c Mon Sep 17 00:00:00 2001 From: satok Date: Mon, 12 Dec 2011 20:53:22 +0900 Subject: [PATCH] Use priority queue for native string buffer +1 2 -6 2 Performance before ==== test finished, terminate logcat ===== (0) 100.34 (0.26%) (1) 37149.26 (95.30%) (2) 8.43 (0.02%) (3) 11.18 (0.03%) (4) 9.92 (0.03%) (5) 1330.60 (3.41%) (6) 250.46 (0.64%) (20) 12.41 (0.03%) Total 38982.50 (sum of others 38872.59) after ==== test finished, terminate logcat ===== (0) 97.65 (0.26%) (1) 35427.43 (95.32%) (2) 10.30 (0.03%) (3) 8.95 (0.02%) (4) 11.01 (0.03%) (5) 1224.67 (3.30%) (6) 243.76 (0.66%) (20) 40.91 (0.11%) Total 37167.04 (sum of others 37064.68) Change-Id: Id4d3b88a9cdef765affc52973aeac951ecc6a8ca --- native/Android.mk | 8 +- native/src/defines.h | 2 + native/src/unigram_dictionary.cpp | 93 ++++--------------- native/src/unigram_dictionary.h | 14 ++- native/src/words_priority_queue.h | 146 ++++++++++++++++++++++++++++++ 5 files changed, 178 insertions(+), 85 deletions(-) create mode 100644 native/src/words_priority_queue.h diff --git a/native/Android.mk b/native/Android.mk index d2537f055..5dcc1e578 100644 --- a/native/Android.mk +++ b/native/Android.mk @@ -46,15 +46,19 @@ LOCAL_MODULE := libjni_latinime LOCAL_MODULE_TAGS := user +# For STL +LOCAL_C_INCLUDES += external/stlport/stlport bionic +LOCAL_SHARED_LIBRARIES += libstlport + ifeq ($(FLAG_DO_PROFILE), true) $(warning Making profiling version of native library) LOCAL_CFLAGS += -DFLAG_DO_PROFILE - LOCAL_SHARED_LIBRARIES := libcutils libutils + LOCAL_SHARED_LIBRARIES += libcutils libutils else # FLAG_DO_PROFILE ifeq ($(FLAG_DBG), true) $(warning Making debug version of native library) LOCAL_CFLAGS += -DFLAG_DBG - LOCAL_SHARED_LIBRARIES := libcutils libutils + LOCAL_SHARED_LIBRARIES += libcutils libutils endif # FLAG_DBG endif # FLAG_DO_PROFILE diff --git a/native/src/defines.h b/native/src/defines.h index ef1beb92f..b59f62306 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -101,6 +101,7 @@ static void prof_out(void) { #define DEBUG_PROXIMITY_INFO true #define DEBUG_CORRECTION false #define DEBUG_CORRECTION_FREQ true +#define DEBUG_WORDS_PRIORITY_QUEUE true #define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0) @@ -125,6 +126,7 @@ static void dumpWord(const unsigned short* word, const int length) { #define DEBUG_PROXIMITY_INFO false #define DEBUG_CORRECTION false #define DEBUG_CORRECTION_FREQ false +#define DEBUG_WORDS_PRIORITY_QUEUE false #define DUMP_WORD(word, length) diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index 647bfde04..e17e7d07b 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -49,10 +49,12 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed LOGI("UnigramDictionary - constructor"); } mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier); + mWordsPriorityQueue = new WordsPriorityQueue(maxWords, maxWordLength); } UnigramDictionary::~UnigramDictionary() { delete mCorrection; + delete mWordsPriorityQueue; } static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize, @@ -88,7 +90,7 @@ bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codes void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain, - const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) { + const int currentDepth, int* codesDest) { if (currentDepth < MAX_UMLAUT_SEARCH_DEPTH) { for (int i = 0; i < codesRemain; ++i) { @@ -105,8 +107,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, codesBufferSize, flags, codesSrc + (i + 1) * MAX_PROXIMITY_CHARS, codesRemain - i - 1, - currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, outWords, - frequencies); + currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS); // Copy the second char of the digraph in place, then continue processing on // the remaining part of the word. @@ -115,8 +116,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit BYTES_IN_ONE_CHAR); getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, codesBufferSize, flags, codesSrc + i * MAX_PROXIMITY_CHARS, - codesRemain - i, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, - outWords, frequencies); + codesRemain - i, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS); return; } } @@ -132,8 +132,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit memcpy(codesDest, codesSrc, remainingBytes); getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codesBuffer, - (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies, - flags); + (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, flags); } int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, @@ -144,28 +143,24 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x { // Incrementally tune the word and try all possibilities int codesBuffer[getCodesBufferSize(codes, codesSize, MAX_PROXIMITY_CHARS)]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, - codesSize, flags, codes, codesSize, 0, codesBuffer, outWords, frequencies); + codesSize, flags, codes, codesSize, 0, codesBuffer); } else { // Normal processing - getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, - outWords, frequencies, flags); + getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, flags); } PROF_START(20); - // Get the word count - int suggestedWordsCount = 0; - while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) { - suggestedWordsCount++; - } + const int suggestedWordsCount = + mWordsPriorityQueue->outputSuggestions(frequencies, outWords); if (DEBUG_DICT) { LOGI("Returning %d words", suggestedWordsCount); /// Print the returned words for (int j = 0; j < suggestedWordsCount; ++j) { #ifdef FLAG_DBG - short unsigned int* w = mOutputChars + j * MAX_WORD_LENGTH; + short unsigned int* w = outWords + j * MAX_WORD_LENGTH; char s[MAX_WORD_LENGTH]; for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i]; - LOGI("%s %i", s, mFrequencies[j]); + LOGI("%s %i", s, frequencies[j]); #endif } } @@ -176,12 +171,12 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, - unsigned short *outWords, int *frequencies, const int flags) { + const int flags) { PROF_OPEN; PROF_START(0); initSuggestions( - proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies); + proximityInfo, xcoordinates, ycoordinates, codes, codesSize); if (DEBUG_DICT) assert(codesSize == mInputLength); const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); @@ -241,71 +236,19 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, } void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates, - const int *yCoordinates, const int *codes, const int codesSize, - unsigned short *outWords, int *frequencies) { + const int *yCoordinates, const int *codes, const int codesSize) { if (DEBUG_DICT) { LOGI("initSuggest"); } - mFrequencies = frequencies; - mOutputChars = outWords; mInputLength = codesSize; proximityInfo->setInputParams(codes, codesSize, xCoordinates, yCoordinates); mProximityInfo = proximityInfo; + mWordsPriorityQueue->clear(); } -// TODO: We need to optimize addWord by using STL or something // TODO: This needs to take an const unsigned short* and not tinker with its contents -bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) { - word[length] = 0; - if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) { -#ifdef FLAG_DBG - char s[length + 1]; - for (int i = 0; i <= length; i++) s[i] = word[i]; - LOGI("Found word = %s, freq = %d", s, frequency); -#endif - } - if (length > MAX_WORD_LENGTH) { - if (DEBUG_DICT) { - LOGI("Exceeded max word length."); - } - return false; - } - - // Find the right insertion point - int insertAt = 0; - while (insertAt < MAX_WORDS) { - // TODO: How should we sort words with the same frequency? - if (frequency > mFrequencies[insertAt]) { - break; - } - insertAt++; - } - if (insertAt < MAX_WORDS) { - if (DEBUG_DICT) { -#ifdef FLAG_DBG - char s[length + 1]; - for (int i = 0; i <= length; i++) s[i] = word[i]; - LOGI("Added word = %s, freq = %d, %d", s, frequency, S_INT_MAX); -#endif - } - memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]), - (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]), - (MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0])); - mFrequencies[insertAt] = frequency; - memmove((char*) mOutputChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short), - (char*) mOutputChars + insertAt * MAX_WORD_LENGTH * sizeof(short), - (MAX_WORDS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH); - unsigned short *dest = mOutputChars + insertAt * MAX_WORD_LENGTH; - while (length--) { - *dest++ = *word++; - } - *dest = 0; // NULL terminate - if (DEBUG_DICT) { - LOGI("Added word at %d", insertAt); - } - return true; - } - return false; +void UnigramDictionary::addWord(unsigned short *word, int length, int frequency) { + mWordsPriorityQueue->push(frequency, word, length); } static const char QUOTE = '\''; diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index 4f4fef267..506ed62fc 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -22,6 +22,7 @@ #include "correction_state.h" #include "defines.h" #include "proximity_info.h" +#include "words_priority_queue.h" namespace latinime { @@ -73,18 +74,16 @@ public: private: void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, - const int *ycoordinates, const int *codes, const int codesSize, - unsigned short *outWords, int *frequencies, const int flags); + const int *ycoordinates, const int *codes, const int codesSize, const int flags); bool isDigraph(const int* codes, const int i, const int codesSize) const; void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain, - const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies); + const int currentDepth, int* codesDest); void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, - const int *ycoordinates, const int *codes, const int codesSize, - unsigned short *outWords, int *frequencies); + const int *ycoordinates, const int *codes, const int codesSize); void getSuggestionCandidates(const bool useFullEditDistance); - bool addWord(unsigned short *word, int length, int frequency); + void addWord(unsigned short *word, int length, int frequency); void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction); void getMissingSpaceWords(const int inputLength, const int missingSpacePos, Correction *correction, const bool useFullEditDistance); @@ -123,8 +122,7 @@ private: }; static const struct digraph_t { int first; int second; } GERMAN_UMLAUT_DIGRAPHS[]; - int *mFrequencies; - unsigned short *mOutputChars; + WordsPriorityQueue *mWordsPriorityQueue; ProximityInfo *mProximityInfo; Correction *mCorrection; int mInputLength; diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h new file mode 100644 index 000000000..366b1b67a --- /dev/null +++ b/native/src/words_priority_queue.h @@ -0,0 +1,146 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_WORDS_PRIORITY_QUEUE_H +#define LATINIME_WORDS_PRIORITY_QUEUE_H + +#include +#include +#include "defines.h" + +namespace latinime { + +class WordsPriorityQueue { +private: + class SuggestedWord { + public: + int mScore; + unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; + int mWordLength; + bool mUsed; + + void setParams(int score, unsigned short* word, int wordLength) { + mScore = score; + mWordLength = wordLength; + memcpy(mWord, word, sizeof(unsigned short) * wordLength); + mUsed = true; + } + }; + + struct wordComparator { + bool operator ()(SuggestedWord * left, SuggestedWord * right) { + return left->mScore > right->mScore; + } + }; + + SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word, + int wordLength) { + for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) { + if (!mSuggestedWords[i].mUsed) { + mSuggestedWords[i].setParams(score, word, wordLength); + return &mSuggestedWords[i]; + } + } + return 0; + } + + typedef std::priority_queue, + wordComparator> Suggestions; + Suggestions mSuggestions; + const unsigned int MAX_WORDS; + const unsigned int MAX_WORD_LENGTH; + SuggestedWord* mSuggestedWords; + +public: + WordsPriorityQueue(int maxWords, int maxWordLength) : + MAX_WORDS((unsigned int) maxWords), MAX_WORD_LENGTH( + (unsigned int) maxWordLength) { + mSuggestedWords = new SuggestedWord[maxWordLength]; + for (int i = 0; i < maxWordLength; ++i) { + mSuggestedWords[i].mUsed = false; + } + } + ~WordsPriorityQueue() { + delete[] mSuggestedWords; + } + + void push(int score, unsigned short* word, int wordLength) { + SuggestedWord* sw = 0; + if (mSuggestions.size() >= MAX_WORDS) { + sw = mSuggestions.top(); + const int minScore = sw->mScore; + if (minScore >= score) { + return; + } else { + sw->mUsed = false; + mSuggestions.pop(); + } + } + if (sw == 0) { + sw = getFreeSuggestedWord(score, word, wordLength); + } else { + sw->setParams(score, word, wordLength); + } + if (sw == 0) { + LOGE("SuggestedWord is accidentally null."); + return; + } + if (DEBUG_WORDS_PRIORITY_QUEUE) { + LOGI("Push word. %d, %d", score, wordLength); + DUMP_WORD(word, wordLength); + } + mSuggestions.push(sw); + } + + int outputSuggestions(int *frequencies, unsigned short *outputChars) { + const unsigned int size = min(MAX_WORDS, mSuggestions.size()); + int index = size - 1; + while (!mSuggestions.empty() && index >= 0) { + SuggestedWord* sw = mSuggestions.top(); + if (DEBUG_WORDS_PRIORITY_QUEUE) { + LOGI("dump word. %d", sw->mScore); + DUMP_WORD(sw->mWord, sw->mWordLength); + } + const unsigned int wordLength = sw->mWordLength; + char* targetAdr = (char*) outputChars + + (index) * MAX_WORD_LENGTH * sizeof(short); + frequencies[index] = sw->mScore; + memcpy(targetAdr, sw->mWord, (wordLength) * sizeof(short)); + if (wordLength < MAX_WORD_LENGTH) { + ((unsigned short*) targetAdr)[wordLength] = 0; + } + sw->mUsed = false; + mSuggestions.pop(); + --index; + } + return size; + } + + void clear() { + while (!mSuggestions.empty()) { + SuggestedWord* sw = mSuggestions.top(); + if (DEBUG_WORDS_PRIORITY_QUEUE) { + LOGI("Clear word. %d", sw->mScore); + DUMP_WORD(sw->mWord, sw->mWordLength); + } + sw->mUsed = false; + mSuggestions.pop(); + } + } +}; +} + +#endif // LATINIME_WORDS_PRIORITY_QUEUE_H