LatinIME/native/jni/src/unigram_dictionary.h
Ken Wakasa f2789819bd Cosmetic fixes and a bug fix in UnigramDictionary::testCharGroupForContinuedLikeness().
This change has actually been extracted from a change work in progress I4fe423834b8131fb122251892c98228a6e08ba25

Change-Id: I52568fa09da2ea22be7f8bfe9676b7cd73c31fa4
2012-09-04 14:23:37 +09:00

129 lines
6.7 KiB
C++

/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_UNIGRAM_DICTIONARY_H
#define LATINIME_UNIGRAM_DICTIONARY_H
#include <map>
#include <stdint.h>
#include "defines.h"
namespace latinime {
class Correction;
class ProximityInfo;
class TerminalAttributes;
class WordsPriorityQueuePool;
class UnigramDictionary {
typedef struct { int first; int second; int replacement; } digraph_t;
public:
// Error tolerances
static const int DEFAULT_MAX_ERRORS = 2;
static const int MAX_ERRORS_FOR_TWO_WORDS = 1;
static const int FLAG_MULTIPLE_SUGGEST_ABORT = 0;
static const int FLAG_MULTIPLE_SUGGEST_SKIP = 1;
static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2;
UnigramDictionary(const uint8_t *const streamStart, int typedLetterMultipler,
int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
int getFrequency(const int32_t *const inWord, const int length) const;
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
int *outputTypes) const;
virtual ~UnigramDictionary();
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(UnigramDictionary);
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int inputSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, Correction *correction,
WordsPriorityQueuePool *queuePool) const;
int getDigraphReplacement(const int *codes, const int i, const int codesSize,
const digraph_t *const digraphs, const unsigned int digraphsSize) const;
void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codesBuffer,
int *xCoordinatesBuffer, int *yCoordinatesBuffer, const int codesBufferSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, const int *codesSrc, const int codesRemain,
const int currentDepth, int *codesDest, Correction *correction,
WordsPriorityQueuePool *queuePool, const digraph_t *const digraphs,
const unsigned int digraphsSize) const;
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
Correction *correction) const;
void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, const bool useFullEditDistance, const int inputSize,
Correction *correction, WordsPriorityQueuePool *queuePool) const;
void getSuggestionCandidates(
const bool useFullEditDistance, const int inputSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
Correction *correction, WordsPriorityQueuePool *queuePool, const bool doAutoCompletion,
const int maxErrors, const int currentWordIndex) const;
void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputSize,
Correction *correction, WordsPriorityQueuePool *queuePool,
const bool hasAutoCorrectionCandidate) const;
void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
const int currentWordIndex) const;
// Process a node by considering proximity, missing and excessive character
bool processCurrentNode(const int initialPos, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, Correction *correction, int *newCount,
int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
const int currentWordIndex) const;
int getMostFrequentWordLike(const int startInputIndex, const int inputSize,
Correction *correction, unsigned short *word) const;
int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int inputSize,
short unsigned int *outWord) const;
int getSubStringSuggestion(
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
const int *codes, const bool useFullEditDistance, Correction *correction,
WordsPriorityQueuePool *queuePool, const int inputSize,
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
const int inputWordStartPos, const int inputWordLength,
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const;
void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputSize,
Correction *correction, WordsPriorityQueuePool *queuePool,
const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
const int outputWordLength, int *freqArray, int *wordLengthArray,
unsigned short *outputWord) const;
const uint8_t *const DICT_ROOT;
const int MAX_WORD_LENGTH;
const int MAX_WORDS;
const int TYPED_LETTER_MULTIPLIER;
const int FULL_WORD_MULTIPLIER;
const int ROOT_POS;
const unsigned int BYTES_IN_ONE_CHAR;
const int MAX_DIGRAPH_SEARCH_DEPTH;
const int FLAGS;
static const digraph_t GERMAN_UMLAUT_DIGRAPHS[];
static const digraph_t FRENCH_LIGATURES_DIGRAPHS[];
};
} // namespace latinime
#endif // LATINIME_UNIGRAM_DICTIONARY_H