Merge "(Step 1) Move proximity related parameters from unigram_dictionary to proximity_info"

main
satok 2011-07-13 21:30:30 -07:00 committed by Android (Google) Code Review
commit 46f2d44a29
4 changed files with 51 additions and 31 deletions

View File

@ -63,4 +63,29 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
return false;
}
// TODO: Calculate nearby codes here.
void ProximityInfo::setInputParams(const int* inputCodes, const int inputLength) {
mInputCodes = inputCodes;
mInputLength = inputLength;
}
const int* ProximityInfo::getProximityCharsAt(const int index) const {
return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE);
}
bool ProximityInfo::sameAsTyped(const unsigned short *word, int length) const {
if (length != mInputLength) {
return false;
}
const int *inputCodes = mInputCodes;
while (length--) {
if ((unsigned int) *inputCodes != (unsigned int) *word) {
return false;
}
inputCodes += MAX_PROXIMITY_CHARS_SIZE;
word++;
}
return true;
}
} // namespace latinime

View File

@ -30,6 +30,9 @@ public:
const uint32_t *proximityCharsArray);
~ProximityInfo();
bool hasSpaceProximity(const int x, const int y) const;
void setInputParams(const int* inputCodes, const int inputLength);
const int* getProximityCharsAt(const int index) const;
bool sameAsTyped(const unsigned short *word, int length) const;
private:
int getStartIndexFromCoordinates(const int x, const int y) const;
const int MAX_PROXIMITY_CHARS_SIZE;
@ -39,7 +42,9 @@ private:
const int GRID_HEIGHT;
const int CELL_WIDTH;
const int CELL_HEIGHT;
const int *mInputCodes;
uint32_t *mProximityCharsArray;
int mInputLength;
};
} // namespace latinime

View File

@ -54,7 +54,7 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed
// TODO : remove this variable.
ROOT_POS(0),
#endif // NEW_DICTIONARY_FORMAT
BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(*mInputCodes)),
BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(int)),
MAX_UMLAUT_SEARCH_DEPTH(DEFAULT_MAX_UMLAUT_SEARCH_DEPTH) {
if (DEBUG_DICT) {
LOGI("UnigramDictionary - constructor");
@ -93,7 +93,7 @@ bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codes
// codesDest is the current point in the work buffer.
// codesSrc is the current point in the user-input, original, content-unmodified buffer.
// codesRemain is the remaining size in codesSrc.
void UnigramDictionary::getWordWithDigraphSuggestionsRec(const ProximityInfo *proximityInfo,
void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) {
@ -143,7 +143,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(const ProximityInfo *pr
(codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies);
}
int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
unsigned short *outWords, int *frequencies) {
@ -187,13 +187,14 @@ int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const
return suggestedWordsCount;
}
void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo,
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize,
unsigned short *outWords, int *frequencies) {
PROF_OPEN;
PROF_START(0);
initSuggestions(codes, codesSize, outWords, frequencies);
initSuggestions(
proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies);
if (DEBUG_DICT) assert(codesSize == mInputLength);
const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
@ -275,16 +276,18 @@ void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo,
PROF_END(6);
}
void UnigramDictionary::initSuggestions(const int *codes, const int codesSize,
void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
unsigned short *outWords, int *frequencies) {
if (DEBUG_DICT) {
LOGI("initSuggest");
}
mFrequencies = frequencies;
mOutputChars = outWords;
mInputCodes = codes;
mInputLength = codesSize;
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
proximityInfo->setInputParams(codes, codesSize);
mProximityInfo = proximityInfo;
}
static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) {
@ -360,21 +363,6 @@ static inline unsigned short toBaseLowerCase(unsigned short c) {
return c;
}
bool UnigramDictionary::sameAsTyped(const unsigned short *word, int length) const {
if (length != mInputLength) {
return false;
}
const int *inputCodes = mInputCodes;
while (length--) {
if ((unsigned int) *inputCodes != (unsigned int) *word) {
return false;
}
inputCodes += MAX_PROXIMITY_CHARS;
word++;
}
return true;
}
static const char QUOTE = '\'';
static const char SPACE = ' ';
@ -569,6 +557,8 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
if (excessivePos >= 0) {
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
if (!existsAdjacentProximityChars(inputIndex, mInputLength)) {
// If an excessive character is not adjacent to the left char or the right char,
// we will demote this word.
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
}
}
@ -678,7 +668,7 @@ inline void UnigramDictionary::onTerminal(unsigned short int* word, const int de
const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
int* nextLetters, const int nextLettersSize) {
const bool isSameAsTyped = sameLength ? sameAsTyped(word, depth + 1) : false;
const bool isSameAsTyped = sameLength ? mProximityInfo->sameAsTyped(word, depth + 1) : false;
if (isSameAsTyped) return;
if (depth >= MIN_SUGGEST_DEPTH) {

View File

@ -82,26 +82,26 @@ public:
int maxAlternatives);
#endif // NEW_DICTIONARY_FORMAT
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
int getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
unsigned short *outWords, int *frequencies);
~UnigramDictionary();
private:
void getWordSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
unsigned short *outWords, int *frequencies);
bool isDigraph(const int* codes, const int i, const int codesSize) const;
void getWordWithDigraphSuggestionsRec(const ProximityInfo *proximityInfo,
void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies);
void initSuggestions(const int *codes, const int codesSize, unsigned short *outWords,
int *frequencies);
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
unsigned short *outWords, int *frequencies);
void getSuggestionCandidates(const int skipPos, const int excessivePos,
const int transposedPos, int *nextLetters, const int nextLettersSize,
const int maxDepth);
bool sameAsTyped(const unsigned short *word, int length) const;
bool addWord(unsigned short *word, int length, int frequency);
bool getSplitTwoWordsSuggestion(const int inputLength,
const int firstWordStartPos, const int firstWordLength,
@ -129,7 +129,7 @@ private:
int *newDiffs, int *nextSiblingPosition, int *nextOutputIndex);
bool existsAdjacentProximityChars(const int inputIndex, const int inputLength) const;
inline const int* getInputCharsAt(const int index) const {
return mInputCodes + (index * MAX_PROXIMITY_CHARS);
return mProximityInfo->getProximityCharsAt(index);
}
int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
unsigned short *word);
@ -174,7 +174,7 @@ private:
int *mFrequencies;
unsigned short *mOutputChars;
const int *mInputCodes;
const ProximityInfo *mProximityInfo;
int mInputLength;
// MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];