2010-12-01 12:22:15 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2010 The Android Open Source Project
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef LATINIME_UNIGRAM_DICTIONARY_H
|
|
|
|
#define LATINIME_UNIGRAM_DICTIONARY_H
|
|
|
|
|
2012-05-07 07:28:30 +00:00
|
|
|
#include <map>
|
2011-06-16 11:55:16 +00:00
|
|
|
#include <stdint.h>
|
2010-12-02 05:53:24 +00:00
|
|
|
#include "defines.h"
|
2013-04-03 00:21:08 +00:00
|
|
|
#include "digraph_utils.h"
|
2010-12-01 12:22:15 +00:00
|
|
|
|
2010-12-02 05:53:24 +00:00
|
|
|
namespace latinime {
|
2010-12-01 12:22:15 +00:00
|
|
|
|
2012-07-31 08:56:40 +00:00
|
|
|
class Correction;
|
|
|
|
class ProximityInfo;
|
2011-12-26 06:16:59 +00:00
|
|
|
class TerminalAttributes;
|
2012-07-31 08:56:40 +00:00
|
|
|
class WordsPriorityQueuePool;
|
|
|
|
|
2010-12-01 12:22:15 +00:00
|
|
|
class UnigramDictionary {
|
2012-01-06 03:24:38 +00:00
|
|
|
public:
|
2011-12-15 05:53:19 +00:00
|
|
|
// Error tolerances
|
|
|
|
static const int DEFAULT_MAX_ERRORS = 2;
|
|
|
|
static const int MAX_ERRORS_FOR_TWO_WORDS = 1;
|
|
|
|
|
2012-05-30 08:28:34 +00:00
|
|
|
static const int FLAG_MULTIPLE_SUGGEST_ABORT = 0;
|
|
|
|
static const int FLAG_MULTIPLE_SUGGEST_SKIP = 1;
|
|
|
|
static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2;
|
2013-04-03 00:23:13 +00:00
|
|
|
UnigramDictionary(const uint8_t *const streamStart, const unsigned int dictFlags);
|
2013-03-18 04:08:31 +00:00
|
|
|
int getProbability(const int *const inWord, const int length) const;
|
2012-10-29 09:06:22 +00:00
|
|
|
int getBigramPosition(int pos, int *word, int offset, int length) const;
|
2012-09-04 03:49:46 +00:00
|
|
|
int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
2013-01-11 16:18:00 +00:00
|
|
|
const int *ycoordinates, const int *inputCodePoints, const int inputSize,
|
2012-09-04 03:49:46 +00:00
|
|
|
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
2012-10-29 09:06:22 +00:00
|
|
|
const bool useFullEditDistance, int *outWords, int *frequencies,
|
2012-09-04 03:49:46 +00:00
|
|
|
int *outputTypes) const;
|
2013-04-03 00:23:13 +00:00
|
|
|
int getDictFlags() const { return DICT_FLAGS; }
|
2011-07-15 04:49:00 +00:00
|
|
|
virtual ~UnigramDictionary();
|
2010-12-01 12:22:15 +00:00
|
|
|
|
2012-01-06 03:24:38 +00:00
|
|
|
private:
|
2012-06-14 18:25:50 +00:00
|
|
|
DISALLOW_IMPLICIT_CONSTRUCTORS(UnigramDictionary);
|
2011-07-13 01:32:02 +00:00
|
|
|
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
2013-01-11 16:18:00 +00:00
|
|
|
const int *ycoordinates, const int *inputCodePoints, const int inputSize,
|
2012-05-07 07:28:30 +00:00
|
|
|
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
|
|
|
const bool useFullEditDistance, Correction *correction,
|
2012-06-08 10:52:19 +00:00
|
|
|
WordsPriorityQueuePool *queuePool) const;
|
2013-01-11 16:18:00 +00:00
|
|
|
int getDigraphReplacement(const int *codes, const int i, const int inputSize,
|
2013-04-03 00:21:08 +00:00
|
|
|
const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const;
|
2013-01-08 08:23:43 +00:00
|
|
|
void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates,
|
|
|
|
const int *ycoordinates, const int *codesBuffer, int *xCoordinatesBuffer,
|
|
|
|
int *yCoordinatesBuffer, const int codesBufferSize, const std::map<int, int> *bigramMap,
|
|
|
|
const uint8_t *bigramFilter, const bool useFullEditDistance, const int *codesSrc,
|
|
|
|
const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
|
2013-04-03 00:21:08 +00:00
|
|
|
WordsPriorityQueuePool *queuePool, const DigraphUtils::digraph_t *const digraphs,
|
2013-01-08 08:23:43 +00:00
|
|
|
const unsigned int digraphsSize) const;
|
2011-07-13 01:32:02 +00:00
|
|
|
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
2013-01-11 16:18:00 +00:00
|
|
|
const int *ycoordinates, const int *codes, const int inputSize,
|
2012-06-08 10:52:19 +00:00
|
|
|
Correction *correction) const;
|
2011-12-15 13:29:05 +00:00
|
|
|
void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
2012-05-07 07:28:30 +00:00
|
|
|
const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap,
|
2012-08-23 06:46:43 +00:00
|
|
|
const uint8_t *bigramFilter, const bool useFullEditDistance, const int inputSize,
|
2012-07-25 08:51:43 +00:00
|
|
|
Correction *correction, WordsPriorityQueuePool *queuePool) const;
|
2012-04-23 10:25:28 +00:00
|
|
|
void getSuggestionCandidates(
|
2012-08-23 06:46:43 +00:00
|
|
|
const bool useFullEditDistance, const int inputSize,
|
2012-05-07 07:28:30 +00:00
|
|
|
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
2012-07-25 08:51:43 +00:00
|
|
|
Correction *correction, WordsPriorityQueuePool *queuePool, const bool doAutoCompletion,
|
2012-06-08 10:52:19 +00:00
|
|
|
const int maxErrors, const int currentWordIndex) const;
|
2013-01-08 08:23:43 +00:00
|
|
|
void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
|
|
|
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
|
|
|
|
const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool,
|
2012-06-08 10:52:19 +00:00
|
|
|
const bool hasAutoCorrectionCandidate) const;
|
2013-01-08 08:23:43 +00:00
|
|
|
void onTerminal(const int freq, const TerminalAttributes &terminalAttributes,
|
2012-01-23 07:52:37 +00:00
|
|
|
Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
|
2012-06-08 10:52:19 +00:00
|
|
|
const int currentWordIndex) const;
|
2010-12-08 08:05:39 +00:00
|
|
|
// Process a node by considering proximity, missing and excessive character
|
2012-05-07 07:28:30 +00:00
|
|
|
bool processCurrentNode(const int initialPos, const std::map<int, int> *bigramMap,
|
|
|
|
const uint8_t *bigramFilter, Correction *correction, int *newCount,
|
|
|
|
int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
|
2012-06-08 10:52:19 +00:00
|
|
|
const int currentWordIndex) const;
|
2013-03-18 04:08:31 +00:00
|
|
|
int getMostProbableWordLike(const int startInputIndex, const int inputSize,
|
2012-10-29 09:06:22 +00:00
|
|
|
Correction *correction, int *word) const;
|
2013-03-18 04:08:31 +00:00
|
|
|
int getMostProbableWordLikeInner(const int *const inWord, const int inputSize,
|
2012-10-29 09:06:22 +00:00
|
|
|
int *outWord) const;
|
2013-01-08 08:23:43 +00:00
|
|
|
int getSubStringSuggestion(ProximityInfo *proximityInfo, const int *xcoordinates,
|
|
|
|
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
|
|
|
|
Correction *correction, WordsPriorityQueuePool *queuePool, const int inputSize,
|
2012-01-26 09:36:19 +00:00
|
|
|
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
|
2013-01-08 08:23:43 +00:00
|
|
|
const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos,
|
|
|
|
const bool isSpaceProximity, int *freqArray, int *wordLengthArray, int *outputWord,
|
|
|
|
int *outputWordLength) const;
|
2012-10-29 09:06:22 +00:00
|
|
|
void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates,
|
|
|
|
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
|
|
|
|
const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool,
|
2012-01-30 04:53:58 +00:00
|
|
|
const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
|
2012-07-25 08:51:43 +00:00
|
|
|
const int outputWordLength, int *freqArray, int *wordLengthArray,
|
2012-10-29 09:06:22 +00:00
|
|
|
int *outputWord) const;
|
2011-06-16 11:55:16 +00:00
|
|
|
|
2012-07-25 08:51:43 +00:00
|
|
|
const uint8_t *const DICT_ROOT;
|
2011-01-07 06:01:51 +00:00
|
|
|
const int ROOT_POS;
|
2012-03-06 10:54:03 +00:00
|
|
|
const int MAX_DIGRAPH_SEARCH_DEPTH;
|
2013-04-03 00:23:13 +00:00
|
|
|
const int DICT_FLAGS;
|
2010-12-01 12:22:15 +00:00
|
|
|
};
|
2011-06-18 04:09:55 +00:00
|
|
|
} // namespace latinime
|
2010-12-01 12:22:15 +00:00
|
|
|
#endif // LATINIME_UNIGRAM_DICTIONARY_H
|