parent
1ec1be46b8
commit
5b0761e6a9
|
@ -26,11 +26,8 @@
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength,
|
BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength,
|
||||||
const bool isLatestDictVersion, const bool hasBigram,
|
|
||||||
Dictionary *parentDictionary)
|
Dictionary *parentDictionary)
|
||||||
: DICT(dict), MAX_WORD_LENGTH(maxWordLength),
|
: DICT(dict), MAX_WORD_LENGTH(maxWordLength), mParentDictionary(parentDictionary) {
|
||||||
IS_LATEST_DICT_VERSION(isLatestDictVersion),
|
|
||||||
HAS_BIGRAM(hasBigram), mParentDictionary(parentDictionary) {
|
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
AKLOGI("BigramDictionary - constructor");
|
AKLOGI("BigramDictionary - constructor");
|
||||||
AKLOGI("Has Bigram : %d", hasBigram);
|
AKLOGI("Has Bigram : %d", hasBigram);
|
||||||
|
|
|
@ -22,8 +22,7 @@ namespace latinime {
|
||||||
class Dictionary;
|
class Dictionary;
|
||||||
class BigramDictionary {
|
class BigramDictionary {
|
||||||
public:
|
public:
|
||||||
BigramDictionary(const unsigned char *dict, int maxWordLength,
|
BigramDictionary(const unsigned char *dict, int maxWordLength, Dictionary *parentDictionary);
|
||||||
const bool isLatestDictVersion, const bool hasBigram, Dictionary *parentDictionary);
|
|
||||||
int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
|
int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
|
||||||
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams);
|
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams);
|
||||||
~BigramDictionary();
|
~BigramDictionary();
|
||||||
|
@ -40,8 +39,6 @@ class BigramDictionary {
|
||||||
const int MAX_WORD_LENGTH;
|
const int MAX_WORD_LENGTH;
|
||||||
// TODO: Re-implement proximity correction for bigram correction
|
// TODO: Re-implement proximity correction for bigram correction
|
||||||
static const int MAX_ALTERNATIVES = 1;
|
static const int MAX_ALTERNATIVES = 1;
|
||||||
const bool IS_LATEST_DICT_VERSION;
|
|
||||||
const bool HAS_BIGRAM;
|
|
||||||
|
|
||||||
Dictionary *mParentDictionary;
|
Dictionary *mParentDictionary;
|
||||||
int *mBigramFreq;
|
int *mBigramFreq;
|
||||||
|
|
|
@ -81,7 +81,7 @@ inline int BinaryFormat::detectFormat(const uint8_t* const dict) {
|
||||||
// Format 2 header is as follows:
|
// Format 2 header is as follows:
|
||||||
// Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
|
// Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
|
||||||
// Version number (2 bytes) 0x00 0x02
|
// Version number (2 bytes) 0x00 0x02
|
||||||
// Options (2 bytes) must be 0x00 0x00
|
// Options (2 bytes)
|
||||||
// Header size (4 bytes) : integer, big endian
|
// Header size (4 bytes) : integer, big endian
|
||||||
return (dict[4] << 8) + dict[5];
|
return (dict[4] << 8) + dict[5];
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -29,9 +29,7 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
|
||||||
int typedLetterMultiplier, int fullWordMultiplier,
|
int typedLetterMultiplier, int fullWordMultiplier,
|
||||||
int maxWordLength, int maxWords)
|
int maxWordLength, int maxWords)
|
||||||
: mDict((unsigned char*) dict), mDictSize(dictSize),
|
: mDict((unsigned char*) dict), mDictSize(dictSize),
|
||||||
mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust),
|
mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust) {
|
||||||
// Checks whether it has the latest dictionary or the old dictionary
|
|
||||||
IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN) {
|
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) {
|
if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) {
|
||||||
AKLOGI("Max word length (%d) is greater than %d",
|
AKLOGI("Max word length (%d) is greater than %d",
|
||||||
|
@ -44,9 +42,8 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
|
||||||
maxWords, SUB_QUEUE_MAX_WORDS, maxWordLength);
|
maxWords, SUB_QUEUE_MAX_WORDS, maxWordLength);
|
||||||
const unsigned int headerSize = BinaryFormat::getHeaderSize(mDict);
|
const unsigned int headerSize = BinaryFormat::getHeaderSize(mDict);
|
||||||
mUnigramDictionary = new UnigramDictionary(mDict + headerSize, typedLetterMultiplier,
|
mUnigramDictionary = new UnigramDictionary(mDict + headerSize, typedLetterMultiplier,
|
||||||
fullWordMultiplier, maxWordLength, maxWords, IS_LATEST_DICT_VERSION);
|
fullWordMultiplier, maxWordLength, maxWords);
|
||||||
mBigramDictionary = new BigramDictionary(mDict + headerSize, maxWordLength,
|
mBigramDictionary = new BigramDictionary(mDict + headerSize, maxWordLength, this);
|
||||||
IS_LATEST_DICT_VERSION, true /* hasBigram */, this);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Dictionary::~Dictionary() {
|
Dictionary::~Dictionary() {
|
||||||
|
|
|
@ -39,7 +39,6 @@ class Dictionary {
|
||||||
codesSize, flags, outWords, frequencies);
|
codesSize, flags, outWords, frequencies);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Call mBigramDictionary instead of mUnigramDictionary
|
|
||||||
int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
|
int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
|
||||||
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams) {
|
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams) {
|
||||||
return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
|
return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
|
||||||
|
@ -68,7 +67,6 @@ class Dictionary {
|
||||||
const int mMmapFd;
|
const int mMmapFd;
|
||||||
const int mDictBufAdjust;
|
const int mDictBufAdjust;
|
||||||
|
|
||||||
const bool IS_LATEST_DICT_VERSION;
|
|
||||||
UnigramDictionary *mUnigramDictionary;
|
UnigramDictionary *mUnigramDictionary;
|
||||||
BigramDictionary *mBigramDictionary;
|
BigramDictionary *mBigramDictionary;
|
||||||
WordsPriorityQueuePool *mWordsPriorityQueuePool;
|
WordsPriorityQueuePool *mWordsPriorityQueuePool;
|
||||||
|
|
|
@ -40,10 +40,8 @@ const UnigramDictionary::digraph_t UnigramDictionary::FRENCH_LIGATURES_DIGRAPHS[
|
||||||
|
|
||||||
// TODO: check the header
|
// TODO: check the header
|
||||||
UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultiplier,
|
UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultiplier,
|
||||||
int fullWordMultiplier, int maxWordLength, int maxWords,
|
int fullWordMultiplier, int maxWordLength, int maxWords)
|
||||||
const bool isLatestDictVersion)
|
|
||||||
: DICT_ROOT(streamStart), MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords),
|
: DICT_ROOT(streamStart), MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords),
|
||||||
IS_LATEST_DICT_VERSION(isLatestDictVersion),
|
|
||||||
TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier),
|
TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier),
|
||||||
// TODO : remove this variable.
|
// TODO : remove this variable.
|
||||||
ROOT_POS(0),
|
ROOT_POS(0),
|
||||||
|
|
|
@ -70,8 +70,7 @@ class UnigramDictionary {
|
||||||
static const int MAX_ERRORS_FOR_TWO_WORDS = 1;
|
static const int MAX_ERRORS_FOR_TWO_WORDS = 1;
|
||||||
|
|
||||||
UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
|
UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
|
||||||
int fullWordMultiplier, int maxWordLength, int maxWords,
|
int fullWordMultiplier, int maxWordLength, int maxWords);
|
||||||
const bool isLatestDictVersion);
|
|
||||||
bool isValidWord(const uint16_t* const inWord, const int length) const;
|
bool isValidWord(const uint16_t* const inWord, const int length) const;
|
||||||
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
|
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
|
||||||
int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
|
int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
|
||||||
|
@ -139,7 +138,6 @@ class UnigramDictionary {
|
||||||
const uint8_t* const DICT_ROOT;
|
const uint8_t* const DICT_ROOT;
|
||||||
const int MAX_WORD_LENGTH;
|
const int MAX_WORD_LENGTH;
|
||||||
const int MAX_WORDS;
|
const int MAX_WORDS;
|
||||||
const bool IS_LATEST_DICT_VERSION;
|
|
||||||
const int TYPED_LETTER_MULTIPLIER;
|
const int TYPED_LETTER_MULTIPLIER;
|
||||||
const int FULL_WORD_MULTIPLIER;
|
const int FULL_WORD_MULTIPLIER;
|
||||||
const int ROOT_POS;
|
const int ROOT_POS;
|
||||||
|
|
Loading…
Reference in New Issue