(Step 1) Move proximity related parameters from unigram_dictionary to proximity_info
Change-Id: Ic630b35f4abffeb84c38bcf5935795b7ff07556amain
parent
6a6aad0758
commit
1d7eaf8462
|
@ -63,4 +63,29 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Calculate nearby codes here.
|
||||||
|
void ProximityInfo::setInputParams(const int* inputCodes, const int inputLength) {
|
||||||
|
mInputCodes = inputCodes;
|
||||||
|
mInputLength = inputLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int* ProximityInfo::getProximityCharsAt(const int index) const {
|
||||||
|
return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ProximityInfo::sameAsTyped(const unsigned short *word, int length) const {
|
||||||
|
if (length != mInputLength) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int *inputCodes = mInputCodes;
|
||||||
|
while (length--) {
|
||||||
|
if ((unsigned int) *inputCodes != (unsigned int) *word) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
inputCodes += MAX_PROXIMITY_CHARS_SIZE;
|
||||||
|
word++;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -30,6 +30,9 @@ public:
|
||||||
const uint32_t *proximityCharsArray);
|
const uint32_t *proximityCharsArray);
|
||||||
~ProximityInfo();
|
~ProximityInfo();
|
||||||
bool hasSpaceProximity(const int x, const int y) const;
|
bool hasSpaceProximity(const int x, const int y) const;
|
||||||
|
void setInputParams(const int* inputCodes, const int inputLength);
|
||||||
|
const int* getProximityCharsAt(const int index) const;
|
||||||
|
bool sameAsTyped(const unsigned short *word, int length) const;
|
||||||
private:
|
private:
|
||||||
int getStartIndexFromCoordinates(const int x, const int y) const;
|
int getStartIndexFromCoordinates(const int x, const int y) const;
|
||||||
const int MAX_PROXIMITY_CHARS_SIZE;
|
const int MAX_PROXIMITY_CHARS_SIZE;
|
||||||
|
@ -39,7 +42,9 @@ private:
|
||||||
const int GRID_HEIGHT;
|
const int GRID_HEIGHT;
|
||||||
const int CELL_WIDTH;
|
const int CELL_WIDTH;
|
||||||
const int CELL_HEIGHT;
|
const int CELL_HEIGHT;
|
||||||
|
const int *mInputCodes;
|
||||||
uint32_t *mProximityCharsArray;
|
uint32_t *mProximityCharsArray;
|
||||||
|
int mInputLength;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -54,7 +54,7 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed
|
||||||
// TODO : remove this variable.
|
// TODO : remove this variable.
|
||||||
ROOT_POS(0),
|
ROOT_POS(0),
|
||||||
#endif // NEW_DICTIONARY_FORMAT
|
#endif // NEW_DICTIONARY_FORMAT
|
||||||
BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(*mInputCodes)),
|
BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(int)),
|
||||||
MAX_UMLAUT_SEARCH_DEPTH(DEFAULT_MAX_UMLAUT_SEARCH_DEPTH) {
|
MAX_UMLAUT_SEARCH_DEPTH(DEFAULT_MAX_UMLAUT_SEARCH_DEPTH) {
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
LOGI("UnigramDictionary - constructor");
|
LOGI("UnigramDictionary - constructor");
|
||||||
|
@ -93,7 +93,7 @@ bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codes
|
||||||
// codesDest is the current point in the work buffer.
|
// codesDest is the current point in the work buffer.
|
||||||
// codesSrc is the current point in the user-input, original, content-unmodified buffer.
|
// codesSrc is the current point in the user-input, original, content-unmodified buffer.
|
||||||
// codesRemain is the remaining size in codesSrc.
|
// codesRemain is the remaining size in codesSrc.
|
||||||
void UnigramDictionary::getWordWithDigraphSuggestionsRec(const ProximityInfo *proximityInfo,
|
void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
|
||||||
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
|
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
|
||||||
const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
|
const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
|
||||||
const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) {
|
const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) {
|
||||||
|
@ -143,7 +143,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(const ProximityInfo *pr
|
||||||
(codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies);
|
(codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies);
|
||||||
}
|
}
|
||||||
|
|
||||||
int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
|
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||||
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
|
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
|
||||||
unsigned short *outWords, int *frequencies) {
|
unsigned short *outWords, int *frequencies) {
|
||||||
|
|
||||||
|
@ -187,13 +187,14 @@ int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const
|
||||||
return suggestedWordsCount;
|
return suggestedWordsCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo,
|
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
||||||
const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize,
|
const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize,
|
||||||
unsigned short *outWords, int *frequencies) {
|
unsigned short *outWords, int *frequencies) {
|
||||||
|
|
||||||
PROF_OPEN;
|
PROF_OPEN;
|
||||||
PROF_START(0);
|
PROF_START(0);
|
||||||
initSuggestions(codes, codesSize, outWords, frequencies);
|
initSuggestions(
|
||||||
|
proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies);
|
||||||
if (DEBUG_DICT) assert(codesSize == mInputLength);
|
if (DEBUG_DICT) assert(codesSize == mInputLength);
|
||||||
|
|
||||||
const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
||||||
|
@ -275,16 +276,18 @@ void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo,
|
||||||
PROF_END(6);
|
PROF_END(6);
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnigramDictionary::initSuggestions(const int *codes, const int codesSize,
|
void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||||
|
const int *ycoordinates, const int *codes, const int codesSize,
|
||||||
unsigned short *outWords, int *frequencies) {
|
unsigned short *outWords, int *frequencies) {
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
LOGI("initSuggest");
|
LOGI("initSuggest");
|
||||||
}
|
}
|
||||||
mFrequencies = frequencies;
|
mFrequencies = frequencies;
|
||||||
mOutputChars = outWords;
|
mOutputChars = outWords;
|
||||||
mInputCodes = codes;
|
|
||||||
mInputLength = codesSize;
|
mInputLength = codesSize;
|
||||||
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
|
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
|
||||||
|
proximityInfo->setInputParams(codes, codesSize);
|
||||||
|
mProximityInfo = proximityInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) {
|
static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) {
|
||||||
|
@ -360,21 +363,6 @@ static inline unsigned short toBaseLowerCase(unsigned short c) {
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UnigramDictionary::sameAsTyped(const unsigned short *word, int length) const {
|
|
||||||
if (length != mInputLength) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const int *inputCodes = mInputCodes;
|
|
||||||
while (length--) {
|
|
||||||
if ((unsigned int) *inputCodes != (unsigned int) *word) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
inputCodes += MAX_PROXIMITY_CHARS;
|
|
||||||
word++;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const char QUOTE = '\'';
|
static const char QUOTE = '\'';
|
||||||
static const char SPACE = ' ';
|
static const char SPACE = ' ';
|
||||||
|
|
||||||
|
@ -569,6 +557,8 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
|
||||||
if (excessivePos >= 0) {
|
if (excessivePos >= 0) {
|
||||||
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
|
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
|
||||||
if (!existsAdjacentProximityChars(inputIndex, mInputLength)) {
|
if (!existsAdjacentProximityChars(inputIndex, mInputLength)) {
|
||||||
|
// If an excessive character is not adjacent to the left char or the right char,
|
||||||
|
// we will demote this word.
|
||||||
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
|
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -678,7 +668,7 @@ inline void UnigramDictionary::onTerminal(unsigned short int* word, const int de
|
||||||
const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
|
const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
|
||||||
int* nextLetters, const int nextLettersSize) {
|
int* nextLetters, const int nextLettersSize) {
|
||||||
|
|
||||||
const bool isSameAsTyped = sameLength ? sameAsTyped(word, depth + 1) : false;
|
const bool isSameAsTyped = sameLength ? mProximityInfo->sameAsTyped(word, depth + 1) : false;
|
||||||
if (isSameAsTyped) return;
|
if (isSameAsTyped) return;
|
||||||
|
|
||||||
if (depth >= MIN_SUGGEST_DEPTH) {
|
if (depth >= MIN_SUGGEST_DEPTH) {
|
||||||
|
|
|
@ -82,26 +82,26 @@ public:
|
||||||
int maxAlternatives);
|
int maxAlternatives);
|
||||||
#endif // NEW_DICTIONARY_FORMAT
|
#endif // NEW_DICTIONARY_FORMAT
|
||||||
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
|
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
|
||||||
int getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
|
int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||||
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
|
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
|
||||||
unsigned short *outWords, int *frequencies);
|
unsigned short *outWords, int *frequencies);
|
||||||
~UnigramDictionary();
|
~UnigramDictionary();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void getWordSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
|
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||||
const int *ycoordinates, const int *codes, const int codesSize,
|
const int *ycoordinates, const int *codes, const int codesSize,
|
||||||
unsigned short *outWords, int *frequencies);
|
unsigned short *outWords, int *frequencies);
|
||||||
bool isDigraph(const int* codes, const int i, const int codesSize) const;
|
bool isDigraph(const int* codes, const int i, const int codesSize) const;
|
||||||
void getWordWithDigraphSuggestionsRec(const ProximityInfo *proximityInfo,
|
void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
|
||||||
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
|
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
|
||||||
const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
|
const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
|
||||||
const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies);
|
const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies);
|
||||||
void initSuggestions(const int *codes, const int codesSize, unsigned short *outWords,
|
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||||
int *frequencies);
|
const int *ycoordinates, const int *codes, const int codesSize,
|
||||||
|
unsigned short *outWords, int *frequencies);
|
||||||
void getSuggestionCandidates(const int skipPos, const int excessivePos,
|
void getSuggestionCandidates(const int skipPos, const int excessivePos,
|
||||||
const int transposedPos, int *nextLetters, const int nextLettersSize,
|
const int transposedPos, int *nextLetters, const int nextLettersSize,
|
||||||
const int maxDepth);
|
const int maxDepth);
|
||||||
bool sameAsTyped(const unsigned short *word, int length) const;
|
|
||||||
bool addWord(unsigned short *word, int length, int frequency);
|
bool addWord(unsigned short *word, int length, int frequency);
|
||||||
bool getSplitTwoWordsSuggestion(const int inputLength,
|
bool getSplitTwoWordsSuggestion(const int inputLength,
|
||||||
const int firstWordStartPos, const int firstWordLength,
|
const int firstWordStartPos, const int firstWordLength,
|
||||||
|
@ -129,7 +129,7 @@ private:
|
||||||
int *newDiffs, int *nextSiblingPosition, int *nextOutputIndex);
|
int *newDiffs, int *nextSiblingPosition, int *nextOutputIndex);
|
||||||
bool existsAdjacentProximityChars(const int inputIndex, const int inputLength) const;
|
bool existsAdjacentProximityChars(const int inputIndex, const int inputLength) const;
|
||||||
inline const int* getInputCharsAt(const int index) const {
|
inline const int* getInputCharsAt(const int index) const {
|
||||||
return mInputCodes + (index * MAX_PROXIMITY_CHARS);
|
return mProximityInfo->getProximityCharsAt(index);
|
||||||
}
|
}
|
||||||
int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
|
int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
|
||||||
unsigned short *word);
|
unsigned short *word);
|
||||||
|
@ -174,7 +174,7 @@ private:
|
||||||
|
|
||||||
int *mFrequencies;
|
int *mFrequencies;
|
||||||
unsigned short *mOutputChars;
|
unsigned short *mOutputChars;
|
||||||
const int *mInputCodes;
|
const ProximityInfo *mProximityInfo;
|
||||||
int mInputLength;
|
int mInputLength;
|
||||||
// MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
|
// MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
|
||||||
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
||||||
|
|
Loading…
Reference in New Issue