Add words priority queue pool
Change-Id: I152df7b876a1756b69ded2ca4fb3ee26b38c971fmain
parent
4d355989bd
commit
a7e5a5a6b9
|
@ -202,6 +202,10 @@ static void dumpWord(const unsigned short* word, const int length) {
|
|||
// This is only used for the size of array. Not to be used in c functions.
|
||||
#define MAX_WORD_LENGTH_INTERNAL 48
|
||||
|
||||
// Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used
|
||||
// for better performance.
|
||||
#define SUB_QUEUE_MAX_WORDS 5
|
||||
|
||||
#define MAX_DEPTH_MULTIPLIER 3
|
||||
|
||||
// TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German
|
||||
|
|
|
@ -39,7 +39,8 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
|
|||
}
|
||||
}
|
||||
mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier);
|
||||
mWordsPriorityQueue = new WordsPriorityQueue(maxWords, maxWordLength);
|
||||
mWordsPriorityQueuePool = new WordsPriorityQueuePool(
|
||||
maxWords, SUB_QUEUE_MAX_WORDS, maxWordLength);
|
||||
mUnigramDictionary = new UnigramDictionary(mDict, typedLetterMultiplier, fullWordMultiplier,
|
||||
maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION);
|
||||
mBigramDictionary = new BigramDictionary(mDict, maxWordLength, maxAlternatives,
|
||||
|
@ -48,7 +49,7 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
|
|||
|
||||
Dictionary::~Dictionary() {
|
||||
delete mCorrection;
|
||||
delete mWordsPriorityQueue;
|
||||
delete mWordsPriorityQueuePool;
|
||||
delete mUnigramDictionary;
|
||||
delete mBigramDictionary;
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
#include "defines.h"
|
||||
#include "proximity_info.h"
|
||||
#include "unigram_dictionary.h"
|
||||
#include "words_priority_queue.h"
|
||||
#include "words_priority_queue_pool.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -34,7 +34,7 @@ public:
|
|||
|
||||
int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates,
|
||||
int *codes, int codesSize, int flags, unsigned short *outWords, int *frequencies) {
|
||||
return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueue,
|
||||
return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool,
|
||||
mCorrection, xcoordinates, ycoordinates, codes,
|
||||
codesSize, flags, outWords, frequencies);
|
||||
}
|
||||
|
@ -81,7 +81,7 @@ private:
|
|||
const bool IS_LATEST_DICT_VERSION;
|
||||
UnigramDictionary *mUnigramDictionary;
|
||||
BigramDictionary *mBigramDictionary;
|
||||
WordsPriorityQueue *mWordsPriorityQueue;
|
||||
WordsPriorityQueuePool *mWordsPriorityQueuePool;
|
||||
Correction *mCorrection;
|
||||
};
|
||||
|
||||
|
|
|
@ -93,7 +93,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
|
|||
const int *xcoordinates, const int *ycoordinates, const int *codesBuffer,
|
||||
const int codesBufferSize, const int flags, const int *codesSrc,
|
||||
const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
|
||||
WordsPriorityQueue *queue) {
|
||||
WordsPriorityQueuePool *queuePool) {
|
||||
|
||||
if (currentDepth < MAX_UMLAUT_SEARCH_DEPTH) {
|
||||
for (int i = 0; i < codesRemain; ++i) {
|
||||
|
@ -110,7 +110,8 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
|
|||
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
|
||||
codesBuffer, codesBufferSize, flags,
|
||||
codesSrc + (i + 1) * MAX_PROXIMITY_CHARS, codesRemain - i - 1,
|
||||
currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, correction, queue);
|
||||
currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, correction,
|
||||
queuePool);
|
||||
|
||||
// Copy the second char of the digraph in place, then continue processing on
|
||||
// the remaining part of the word.
|
||||
|
@ -120,7 +121,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
|
|||
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
|
||||
codesBuffer, codesBufferSize, flags,
|
||||
codesSrc + i * MAX_PROXIMITY_CHARS, codesRemain - i, currentDepth + 1,
|
||||
codesDest + i * MAX_PROXIMITY_CHARS, correction, queue);
|
||||
codesDest + i * MAX_PROXIMITY_CHARS, correction, queuePool);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -137,27 +138,28 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
|
|||
|
||||
getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
|
||||
(codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, flags, correction,
|
||||
queue);
|
||||
queuePool);
|
||||
}
|
||||
|
||||
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueue *queue,
|
||||
Correction *correction, const int *xcoordinates, const int *ycoordinates, const int *codes,
|
||||
const int codesSize, const int flags, unsigned short *outWords, int *frequencies) {
|
||||
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
|
||||
WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
|
||||
unsigned short *outWords, int *frequencies) {
|
||||
|
||||
WordsPriorityQueue* masterQueue = queue;
|
||||
Correction* masterCorrection = correction;
|
||||
if (REQUIRES_GERMAN_UMLAUT_PROCESSING & flags)
|
||||
{ // Incrementally tune the word and try all possibilities
|
||||
int codesBuffer[getCodesBufferSize(codes, codesSize, MAX_PROXIMITY_CHARS)];
|
||||
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
|
||||
codesSize, flags, codes, codesSize, 0, codesBuffer, masterCorrection, masterQueue);
|
||||
codesSize, flags, codes, codesSize, 0, codesBuffer, masterCorrection, queuePool);
|
||||
} else { // Normal processing
|
||||
getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, flags,
|
||||
masterCorrection, masterQueue);
|
||||
masterCorrection, queuePool);
|
||||
}
|
||||
|
||||
PROF_START(20);
|
||||
const int suggestedWordsCount = masterQueue->outputSuggestions(frequencies, outWords);
|
||||
const int suggestedWordsCount =
|
||||
queuePool->getMasterQueue()->outputSuggestions(frequencies, outWords);
|
||||
|
||||
if (DEBUG_DICT) {
|
||||
LOGI("Returning %d words", suggestedWordsCount);
|
||||
|
@ -178,11 +180,13 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, WordsPriorit
|
|||
|
||||
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
||||
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
||||
const int inputLength, const int flags, Correction *correction, WordsPriorityQueue *queue) {
|
||||
const int inputLength, const int flags, Correction *correction,
|
||||
WordsPriorityQueuePool *queuePool) {
|
||||
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
|
||||
|
||||
PROF_OPEN;
|
||||
PROF_START(0);
|
||||
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, queue);
|
||||
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue);
|
||||
if (DEBUG_DICT) assert(codesSize == inputLength);
|
||||
|
||||
const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
||||
|
@ -192,7 +196,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
|||
const bool useFullEditDistance = USE_FULL_EDIT_DISTANCE & flags;
|
||||
// TODO: remove
|
||||
PROF_START(1);
|
||||
getSuggestionCandidates(useFullEditDistance, inputLength, correction, queue);
|
||||
getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue);
|
||||
PROF_END(1);
|
||||
|
||||
PROF_START(2);
|
||||
|
@ -216,7 +220,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
|||
LOGI("--- Suggest missing space characters %d", i);
|
||||
}
|
||||
getMissingSpaceWords(
|
||||
inputLength, i, proximityInfo, correction, useFullEditDistance, queue);
|
||||
inputLength, i, proximityInfo, correction, useFullEditDistance, queuePool);
|
||||
}
|
||||
}
|
||||
PROF_END(5);
|
||||
|
@ -235,8 +239,8 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
|||
i, x, y, proximityInfo->hasSpaceProximity(x, y));
|
||||
}
|
||||
if (proximityInfo->hasSpaceProximity(x, y)) {
|
||||
getMistypedSpaceWords(
|
||||
inputLength, i, proximityInfo, correction, useFullEditDistance, queue);
|
||||
getMistypedSpaceWords(inputLength, i, proximityInfo, correction,
|
||||
useFullEditDistance, queuePool);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -293,20 +297,20 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
|
|||
|
||||
void UnigramDictionary::getMissingSpaceWords(
|
||||
const int inputLength, const int missingSpacePos, ProximityInfo *proximityInfo,
|
||||
Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) {
|
||||
Correction *correction, const bool useFullEditDistance, WordsPriorityQueuePool *queuePool) {
|
||||
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
|
||||
-1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos,
|
||||
useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
|
||||
getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue);
|
||||
getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queuePool);
|
||||
}
|
||||
|
||||
void UnigramDictionary::getMistypedSpaceWords(
|
||||
const int inputLength, const int spaceProximityPos, ProximityInfo *proximityInfo,
|
||||
Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) {
|
||||
Correction *correction, const bool useFullEditDistance, WordsPriorityQueuePool *queuePool) {
|
||||
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
|
||||
-1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */,
|
||||
useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
|
||||
getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue);
|
||||
getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queuePool);
|
||||
}
|
||||
|
||||
inline void UnigramDictionary::onTerminal(
|
||||
|
@ -321,7 +325,9 @@ inline void UnigramDictionary::onTerminal(
|
|||
|
||||
void UnigramDictionary::getSplitTwoWordsSuggestion(
|
||||
const int inputLength, ProximityInfo *proximityInfo, Correction *correction,
|
||||
WordsPriorityQueue *queue) {
|
||||
WordsPriorityQueuePool *queuePool) {
|
||||
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
|
||||
|
||||
const int spaceProximityPos = correction->getSpaceProximityPos();
|
||||
const int missingSpacePos = correction->getMissingSpacePos();
|
||||
if (DEBUG_DICT) {
|
||||
|
@ -373,7 +379,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestion(
|
|||
if (DEBUG_DICT) {
|
||||
LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
|
||||
}
|
||||
addWord(word, newWordLength, pairFreq, queue);
|
||||
addWord(word, newWordLength, pairFreq, masterQueue);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "defines.h"
|
||||
#include "proximity_info.h"
|
||||
#include "words_priority_queue.h"
|
||||
#include "words_priority_queue_pool.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -70,7 +71,7 @@ public:
|
|||
const bool isLatestDictVersion);
|
||||
bool isValidWord(const uint16_t* const inWord, const int length) const;
|
||||
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
|
||||
int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueue *queue,
|
||||
int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
|
||||
Correction *correction, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
|
||||
unsigned short *outWords, int *frequencies);
|
||||
|
@ -80,13 +81,13 @@ private:
|
|||
|
||||
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int inputLength,
|
||||
const int flags, Correction *correction, WordsPriorityQueue *queue);
|
||||
const int flags, Correction *correction, WordsPriorityQueuePool *queuePool);
|
||||
bool isDigraph(const int *codes, const int i, const int codesSize) const;
|
||||
void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
|
||||
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
|
||||
const int codesBufferSize, const int flags, const int* codesSrc,
|
||||
const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
|
||||
WordsPriorityQueue* queue);
|
||||
WordsPriorityQueuePool* queuePool);
|
||||
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int codesSize,
|
||||
WordsPriorityQueue *queue);
|
||||
|
@ -94,13 +95,13 @@ private:
|
|||
const bool useFullEditDistance, const int inputLength, Correction *correction,
|
||||
WordsPriorityQueue* queue);
|
||||
void getSplitTwoWordsSuggestion(const int inputLength, ProximityInfo *proximityInfo,
|
||||
Correction *correction, WordsPriorityQueue *queue);
|
||||
Correction *correction, WordsPriorityQueuePool *queuePool);
|
||||
void getMissingSpaceWords(const int inputLength, const int missingSpacePos,
|
||||
ProximityInfo *proximityInfo, Correction *correction,
|
||||
const bool useFullEditDistance, WordsPriorityQueue *queue);
|
||||
const bool useFullEditDistance, WordsPriorityQueuePool *queuePool);
|
||||
void getMistypedSpaceWords(const int inputLength, const int spaceProximityPos,
|
||||
ProximityInfo *proximityInfo, Correction *correction,
|
||||
const bool useFullEditDistance, WordsPriorityQueue *queue);
|
||||
const bool useFullEditDistance, WordsPriorityQueuePool *queuePool);
|
||||
void onTerminal(const int freq, Correction *correction, WordsPriorityQueue *queue);
|
||||
bool needsToSkipCurrentNode(const unsigned short c,
|
||||
const int inputIndex, const int skipPos, const int depth);
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
namespace latinime {
|
||||
|
||||
class WordsPriorityQueue {
|
||||
private:
|
||||
public:
|
||||
class SuggestedWord {
|
||||
public:
|
||||
int mScore;
|
||||
|
@ -40,31 +40,6 @@ private:
|
|||
}
|
||||
};
|
||||
|
||||
struct wordComparator {
|
||||
bool operator ()(SuggestedWord * left, SuggestedWord * right) {
|
||||
return left->mScore > right->mScore;
|
||||
}
|
||||
};
|
||||
|
||||
SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word,
|
||||
int wordLength) {
|
||||
for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) {
|
||||
if (!mSuggestedWords[i].mUsed) {
|
||||
mSuggestedWords[i].setParams(score, word, wordLength);
|
||||
return &mSuggestedWords[i];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef std::priority_queue<SuggestedWord*, std::vector<SuggestedWord*>,
|
||||
wordComparator> Suggestions;
|
||||
Suggestions mSuggestions;
|
||||
const unsigned int MAX_WORDS;
|
||||
const unsigned int MAX_WORD_LENGTH;
|
||||
SuggestedWord* mSuggestedWords;
|
||||
|
||||
public:
|
||||
WordsPriorityQueue(int maxWords, int maxWordLength) :
|
||||
MAX_WORDS((unsigned int) maxWords), MAX_WORD_LENGTH(
|
||||
(unsigned int) maxWordLength) {
|
||||
|
@ -105,6 +80,13 @@ public:
|
|||
mSuggestions.push(sw);
|
||||
}
|
||||
|
||||
SuggestedWord* topAndPop() {
|
||||
if (mSuggestions.empty()) return 0;
|
||||
SuggestedWord* sw = mSuggestions.top();
|
||||
mSuggestions.pop();
|
||||
return sw;
|
||||
}
|
||||
|
||||
int outputSuggestions(int *frequencies, unsigned short *outputChars) {
|
||||
const unsigned int size = min(MAX_WORDS, mSuggestions.size());
|
||||
int index = size - 1;
|
||||
|
@ -140,6 +122,30 @@ public:
|
|||
mSuggestions.pop();
|
||||
}
|
||||
}
|
||||
private:
|
||||
struct wordComparator {
|
||||
bool operator ()(SuggestedWord * left, SuggestedWord * right) {
|
||||
return left->mScore > right->mScore;
|
||||
}
|
||||
};
|
||||
|
||||
SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word,
|
||||
int wordLength) {
|
||||
for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) {
|
||||
if (!mSuggestedWords[i].mUsed) {
|
||||
mSuggestedWords[i].setParams(score, word, wordLength);
|
||||
return &mSuggestedWords[i];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef std::priority_queue<SuggestedWord*, std::vector<SuggestedWord*>,
|
||||
wordComparator> Suggestions;
|
||||
Suggestions mSuggestions;
|
||||
const unsigned int MAX_WORDS;
|
||||
const unsigned int MAX_WORD_LENGTH;
|
||||
SuggestedWord* mSuggestedWords;
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Copyright (C) 2011 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
|
||||
#define LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
|
||||
|
||||
#include "words_priority_queue.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class WordsPriorityQueuePool {
|
||||
public:
|
||||
WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) {
|
||||
mMasterQueue = new WordsPriorityQueue(mainQueueMaxWords, maxWordLength);
|
||||
mSubQueue1 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength);
|
||||
mSubQueue2 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength);
|
||||
}
|
||||
|
||||
~WordsPriorityQueuePool() {
|
||||
delete mMasterQueue;
|
||||
}
|
||||
|
||||
WordsPriorityQueue* getMasterQueue() {
|
||||
return mMasterQueue;
|
||||
}
|
||||
// TODO: Come up with more generic pool
|
||||
WordsPriorityQueue* getSubQueue1() {
|
||||
return mSubQueue1;
|
||||
}
|
||||
WordsPriorityQueue* getSubQueue2() {
|
||||
return mSubQueue2;
|
||||
}
|
||||
private:
|
||||
WordsPriorityQueue *mMasterQueue;
|
||||
WordsPriorityQueue *mSubQueue1;
|
||||
WordsPriorityQueue *mSubQueue2;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
|
Loading…
Reference in New Issue