2010-12-01 12:22:15 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2010 The Android Open Source Project
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef LATINIME_UNIGRAM_DICTIONARY_H
|
|
|
|
#define LATINIME_UNIGRAM_DICTIONARY_H
|
|
|
|
|
2010-12-02 05:53:24 +00:00
|
|
|
#include "defines.h"
|
2010-12-01 12:22:15 +00:00
|
|
|
|
2010-12-02 05:53:24 +00:00
|
|
|
namespace latinime {
|
2010-12-01 12:22:15 +00:00
|
|
|
|
|
|
|
class UnigramDictionary {
|
|
|
|
public:
|
2010-12-02 05:53:24 +00:00
|
|
|
UnigramDictionary(const unsigned char *dict, int typedLetterMultipler, int fullWordMultiplier,
|
2010-12-08 08:05:39 +00:00
|
|
|
int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion);
|
2010-12-01 12:22:15 +00:00
|
|
|
int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
|
|
|
|
int *nextLetters, int nextLettersSize);
|
|
|
|
~UnigramDictionary();
|
|
|
|
|
|
|
|
private:
|
|
|
|
void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies);
|
2010-12-13 05:42:35 +00:00
|
|
|
void getSuggestionCandidates(const int skipPos, const int excessivePos,
|
2010-12-09 13:08:33 +00:00
|
|
|
const int transposedPos, int *nextLetters, const int nextLettersSize,
|
|
|
|
const int maxDepth);
|
2010-12-01 12:22:15 +00:00
|
|
|
void getVersionNumber();
|
|
|
|
bool checkIfDictVersionIsLatest();
|
|
|
|
int getAddress(int *pos);
|
|
|
|
int getFreq(int *pos);
|
|
|
|
int wideStrLen(unsigned short *str);
|
|
|
|
bool sameAsTyped(unsigned short *word, int length);
|
|
|
|
bool addWord(unsigned short *word, int length, int frequency);
|
|
|
|
unsigned short toLowerCase(unsigned short c);
|
2010-12-03 10:38:08 +00:00
|
|
|
void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
|
|
|
|
const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
|
2010-12-09 13:08:33 +00:00
|
|
|
const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters,
|
|
|
|
const int nextLettersSize);
|
2010-12-08 08:05:39 +00:00
|
|
|
bool getMissingSpaceWords(const int inputLength, const int missingSpacePos);
|
2010-12-07 04:08:39 +00:00
|
|
|
// Keep getWordsOld for comparing performance between getWords and getWordsOld
|
|
|
|
void getWordsOld(const int initialPos, const int inputLength, const int skipPos,
|
2010-12-09 13:08:33 +00:00
|
|
|
const int excessivePos, const int transposedPos, int *nextLetters,
|
|
|
|
const int nextLettersSize);
|
2010-12-01 12:22:15 +00:00
|
|
|
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
2010-12-13 05:42:35 +00:00
|
|
|
int calculateFinalFreq(const int inputIndex, const int snr, const int skipPos,
|
|
|
|
const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
|
2010-12-02 11:19:59 +00:00
|
|
|
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
2010-12-13 05:42:35 +00:00
|
|
|
const int inputIndex, const int depth, const int snr, int *nextLetters,
|
2010-12-09 13:08:33 +00:00
|
|
|
const int nextLettersSize, const int skipPos, const int excessivePos,
|
|
|
|
const int transposedPos, const int freq);
|
2010-12-13 05:42:35 +00:00
|
|
|
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word,
|
|
|
|
const int inputIndex, const int depth, const int snr, const int skipPos,
|
|
|
|
const int excessivePos, const int transposedPos, const int freq, const int addedWeight);
|
2010-12-03 07:39:16 +00:00
|
|
|
bool needsToSkipCurrentNode(const unsigned short c,
|
2010-12-03 10:38:08 +00:00
|
|
|
const int inputIndex, const int skipPos, const int depth);
|
2010-12-09 13:08:33 +00:00
|
|
|
int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos,
|
|
|
|
const int excessivePos, const int transposedPos);
|
2010-12-08 08:05:39 +00:00
|
|
|
// Process a node by considering proximity, missing and excessive character
|
2010-12-06 08:38:58 +00:00
|
|
|
bool processCurrentNode(const int pos, const int depth,
|
2010-12-08 07:04:16 +00:00
|
|
|
const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
|
2010-12-09 13:08:33 +00:00
|
|
|
const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
|
|
|
|
int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
|
2010-12-08 07:04:16 +00:00
|
|
|
bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
|
|
|
|
int *nextSiblingPosition);
|
2010-12-09 10:21:51 +00:00
|
|
|
int getBestWordFreq(const int startInputIndex, const int inputLength, unsigned short *word);
|
2010-12-08 08:05:39 +00:00
|
|
|
// Process a node by considering missing space
|
2010-12-09 10:21:51 +00:00
|
|
|
bool processCurrentNodeForExactMatch(const int firstChildPos,
|
|
|
|
const int startInputIndex, const int depth, unsigned short *word,
|
|
|
|
int *newChildPosition, int *newCount, bool *newTerminal, int *newFreq, int *siblingPos);
|
2010-12-09 12:55:40 +00:00
|
|
|
bool existsAdjacentProximityChars(const int inputIndex, const int inputLength);
|
|
|
|
int* getInputCharsAt(const int index) {return mInputCodes + (index * MAX_PROXIMITY_CHARS);}
|
2010-12-02 05:53:24 +00:00
|
|
|
const unsigned char *DICT;
|
2010-12-01 12:22:15 +00:00
|
|
|
const int MAX_WORD_LENGTH;
|
2011-01-07 06:01:51 +00:00
|
|
|
const int MAX_WORDS;
|
2010-12-08 08:05:39 +00:00
|
|
|
const int MAX_PROXIMITY_CHARS;
|
2010-12-02 05:53:24 +00:00
|
|
|
const bool IS_LATEST_DICT_VERSION;
|
2010-12-02 09:11:54 +00:00
|
|
|
const int TYPED_LETTER_MULTIPLIER;
|
|
|
|
const int FULL_WORD_MULTIPLIER;
|
2011-01-07 06:01:51 +00:00
|
|
|
const int ROOT_POS;
|
2010-12-01 12:22:15 +00:00
|
|
|
|
|
|
|
int *mFrequencies;
|
|
|
|
unsigned short *mOutputChars;
|
|
|
|
int *mInputCodes;
|
|
|
|
int mInputLength;
|
2010-12-02 11:19:59 +00:00
|
|
|
// MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
|
|
|
|
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
2010-12-01 12:22:15 +00:00
|
|
|
int mMaxEditDistance;
|
2010-12-07 04:08:39 +00:00
|
|
|
|
|
|
|
int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];
|
|
|
|
bool mStackTraverseAll[MAX_WORD_LENGTH_INTERNAL];
|
|
|
|
int mStackNodeFreq[MAX_WORD_LENGTH_INTERNAL];
|
|
|
|
int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];
|
|
|
|
int mStackDiffs[MAX_WORD_LENGTH_INTERNAL];
|
|
|
|
int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];
|
2010-12-01 12:22:15 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
}; // namespace latinime
|
|
|
|
|
|
|
|
#endif // LATINIME_UNIGRAM_DICTIONARY_H
|