Merge "(Step 1) Move proximity related parameters from unigram_dictionary to proximity_info"
commit
46f2d44a29
|
@ -63,4 +63,29 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
// TODO: Calculate nearby codes here.
|
||||
void ProximityInfo::setInputParams(const int* inputCodes, const int inputLength) {
|
||||
mInputCodes = inputCodes;
|
||||
mInputLength = inputLength;
|
||||
}
|
||||
|
||||
const int* ProximityInfo::getProximityCharsAt(const int index) const {
|
||||
return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE);
|
||||
}
|
||||
|
||||
bool ProximityInfo::sameAsTyped(const unsigned short *word, int length) const {
|
||||
if (length != mInputLength) {
|
||||
return false;
|
||||
}
|
||||
const int *inputCodes = mInputCodes;
|
||||
while (length--) {
|
||||
if ((unsigned int) *inputCodes != (unsigned int) *word) {
|
||||
return false;
|
||||
}
|
||||
inputCodes += MAX_PROXIMITY_CHARS_SIZE;
|
||||
word++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -30,6 +30,9 @@ public:
|
|||
const uint32_t *proximityCharsArray);
|
||||
~ProximityInfo();
|
||||
bool hasSpaceProximity(const int x, const int y) const;
|
||||
void setInputParams(const int* inputCodes, const int inputLength);
|
||||
const int* getProximityCharsAt(const int index) const;
|
||||
bool sameAsTyped(const unsigned short *word, int length) const;
|
||||
private:
|
||||
int getStartIndexFromCoordinates(const int x, const int y) const;
|
||||
const int MAX_PROXIMITY_CHARS_SIZE;
|
||||
|
@ -39,7 +42,9 @@ private:
|
|||
const int GRID_HEIGHT;
|
||||
const int CELL_WIDTH;
|
||||
const int CELL_HEIGHT;
|
||||
const int *mInputCodes;
|
||||
uint32_t *mProximityCharsArray;
|
||||
int mInputLength;
|
||||
};
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -54,7 +54,7 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed
|
|||
// TODO : remove this variable.
|
||||
ROOT_POS(0),
|
||||
#endif // NEW_DICTIONARY_FORMAT
|
||||
BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(*mInputCodes)),
|
||||
BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(int)),
|
||||
MAX_UMLAUT_SEARCH_DEPTH(DEFAULT_MAX_UMLAUT_SEARCH_DEPTH) {
|
||||
if (DEBUG_DICT) {
|
||||
LOGI("UnigramDictionary - constructor");
|
||||
|
@ -93,7 +93,7 @@ bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codes
|
|||
// codesDest is the current point in the work buffer.
|
||||
// codesSrc is the current point in the user-input, original, content-unmodified buffer.
|
||||
// codesRemain is the remaining size in codesSrc.
|
||||
void UnigramDictionary::getWordWithDigraphSuggestionsRec(const ProximityInfo *proximityInfo,
|
||||
void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
|
||||
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
|
||||
const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
|
||||
const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) {
|
||||
|
@ -143,7 +143,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(const ProximityInfo *pr
|
|||
(codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies);
|
||||
}
|
||||
|
||||
int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
|
||||
unsigned short *outWords, int *frequencies) {
|
||||
|
||||
|
@ -187,13 +187,14 @@ int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const
|
|||
return suggestedWordsCount;
|
||||
}
|
||||
|
||||
void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo,
|
||||
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
||||
const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize,
|
||||
unsigned short *outWords, int *frequencies) {
|
||||
|
||||
PROF_OPEN;
|
||||
PROF_START(0);
|
||||
initSuggestions(codes, codesSize, outWords, frequencies);
|
||||
initSuggestions(
|
||||
proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies);
|
||||
if (DEBUG_DICT) assert(codesSize == mInputLength);
|
||||
|
||||
const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
||||
|
@ -275,16 +276,18 @@ void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo,
|
|||
PROF_END(6);
|
||||
}
|
||||
|
||||
void UnigramDictionary::initSuggestions(const int *codes, const int codesSize,
|
||||
void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int codesSize,
|
||||
unsigned short *outWords, int *frequencies) {
|
||||
if (DEBUG_DICT) {
|
||||
LOGI("initSuggest");
|
||||
}
|
||||
mFrequencies = frequencies;
|
||||
mOutputChars = outWords;
|
||||
mInputCodes = codes;
|
||||
mInputLength = codesSize;
|
||||
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
|
||||
proximityInfo->setInputParams(codes, codesSize);
|
||||
mProximityInfo = proximityInfo;
|
||||
}
|
||||
|
||||
static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) {
|
||||
|
@ -360,21 +363,6 @@ static inline unsigned short toBaseLowerCase(unsigned short c) {
|
|||
return c;
|
||||
}
|
||||
|
||||
bool UnigramDictionary::sameAsTyped(const unsigned short *word, int length) const {
|
||||
if (length != mInputLength) {
|
||||
return false;
|
||||
}
|
||||
const int *inputCodes = mInputCodes;
|
||||
while (length--) {
|
||||
if ((unsigned int) *inputCodes != (unsigned int) *word) {
|
||||
return false;
|
||||
}
|
||||
inputCodes += MAX_PROXIMITY_CHARS;
|
||||
word++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static const char QUOTE = '\'';
|
||||
static const char SPACE = ' ';
|
||||
|
||||
|
@ -569,6 +557,8 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
|
|||
if (excessivePos >= 0) {
|
||||
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
|
||||
if (!existsAdjacentProximityChars(inputIndex, mInputLength)) {
|
||||
// If an excessive character is not adjacent to the left char or the right char,
|
||||
// we will demote this word.
|
||||
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
|
||||
}
|
||||
}
|
||||
|
@ -678,7 +668,7 @@ inline void UnigramDictionary::onTerminal(unsigned short int* word, const int de
|
|||
const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
|
||||
int* nextLetters, const int nextLettersSize) {
|
||||
|
||||
const bool isSameAsTyped = sameLength ? sameAsTyped(word, depth + 1) : false;
|
||||
const bool isSameAsTyped = sameLength ? mProximityInfo->sameAsTyped(word, depth + 1) : false;
|
||||
if (isSameAsTyped) return;
|
||||
|
||||
if (depth >= MIN_SUGGEST_DEPTH) {
|
||||
|
|
|
@ -82,26 +82,26 @@ public:
|
|||
int maxAlternatives);
|
||||
#endif // NEW_DICTIONARY_FORMAT
|
||||
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
|
||||
int getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
|
||||
unsigned short *outWords, int *frequencies);
|
||||
~UnigramDictionary();
|
||||
|
||||
private:
|
||||
void getWordSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int codesSize,
|
||||
unsigned short *outWords, int *frequencies);
|
||||
bool isDigraph(const int* codes, const int i, const int codesSize) const;
|
||||
void getWordWithDigraphSuggestionsRec(const ProximityInfo *proximityInfo,
|
||||
void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
|
||||
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
|
||||
const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
|
||||
const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies);
|
||||
void initSuggestions(const int *codes, const int codesSize, unsigned short *outWords,
|
||||
int *frequencies);
|
||||
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int codesSize,
|
||||
unsigned short *outWords, int *frequencies);
|
||||
void getSuggestionCandidates(const int skipPos, const int excessivePos,
|
||||
const int transposedPos, int *nextLetters, const int nextLettersSize,
|
||||
const int maxDepth);
|
||||
bool sameAsTyped(const unsigned short *word, int length) const;
|
||||
bool addWord(unsigned short *word, int length, int frequency);
|
||||
bool getSplitTwoWordsSuggestion(const int inputLength,
|
||||
const int firstWordStartPos, const int firstWordLength,
|
||||
|
@ -129,7 +129,7 @@ private:
|
|||
int *newDiffs, int *nextSiblingPosition, int *nextOutputIndex);
|
||||
bool existsAdjacentProximityChars(const int inputIndex, const int inputLength) const;
|
||||
inline const int* getInputCharsAt(const int index) const {
|
||||
return mInputCodes + (index * MAX_PROXIMITY_CHARS);
|
||||
return mProximityInfo->getProximityCharsAt(index);
|
||||
}
|
||||
int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
|
||||
unsigned short *word);
|
||||
|
@ -174,7 +174,7 @@ private:
|
|||
|
||||
int *mFrequencies;
|
||||
unsigned short *mOutputChars;
|
||||
const int *mInputCodes;
|
||||
const ProximityInfo *mProximityInfo;
|
||||
int mInputLength;
|
||||
// MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
|
||||
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
||||
|
|
Loading…
Reference in New Issue