Merge "New dict format, step 4"

main
Jean Chalard 2011-06-17 05:30:26 -07:00 committed by Android (Google) Code Review
commit 23eb0fa0b5
2 changed files with 49 additions and 38 deletions

View File

@ -272,6 +272,7 @@ static inline void registerNextLetter(unsigned short c, int *nextLetters, int ne
} }
// TODO: We need to optimize addWord by using STL or something // TODO: We need to optimize addWord by using STL or something
// TODO: This needs to take an const unsigned short* and not tinker with its contents
bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) { bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) {
word[length] = 0; word[length] = 0;
if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) { if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) {
@ -321,6 +322,16 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
return false; return false;
} }
inline void UnigramDictionary::addWordAlternatesSpellings(const uint8_t* const root, int pos,
int depth, int finalFreq) {
// TODO: actually add alternates when the format supports it.
}
static inline bool hasAlternateSpellings(uint8_t flags) {
// TODO: when the format supports it, return the actual value.
return false;
}
static inline unsigned short toBaseLowerCase(unsigned short c) { static inline unsigned short toBaseLowerCase(unsigned short c) {
if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) { if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
c = BASE_CHARS[c]; c = BASE_CHARS[c];
@ -333,7 +344,7 @@ static inline unsigned short toBaseLowerCase(unsigned short c) {
return c; return c;
} }
bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) { bool UnigramDictionary::sameAsTyped(const unsigned short *word, int length) const {
if (length != mInputLength) { if (length != mInputLength) {
return false; return false;
} }
@ -656,28 +667,6 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
return finalFreq; return finalFreq;
} }
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength(
unsigned short *word, const int inputIndex, const int depth, const int matchWeight,
int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
const int transposedPos, const int freq) {
const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos, excessivePos,
transposedPos, freq, false);
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
if (depth >= mInputLength && skipPos < 0) {
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
}
}
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
unsigned short *word, const int inputIndex, const int depth, const int matchWeight,
const int skipPos, const int excessivePos, const int transposedPos, const int freq) {
if (sameAsTyped(word, depth + 1)) return;
const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos,
excessivePos, transposedPos, freq, true);
// Proximity collection will promote a word of the same length as what user typed.
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
}
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c, inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth) { const int inputIndex, const int skipPos, const int depth) {
const unsigned short userTypedChar = getInputCharsAt(inputIndex)[0]; const unsigned short userTypedChar = getInputCharsAt(inputIndex)[0];
@ -708,7 +697,6 @@ inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex
return false; return false;
} }
// In the following function, c is the current character of the dictionary word // In the following function, c is the current character of the dictionary word
// currently examined. // currently examined.
// currentChars is an array containing the keys close to the character the // currentChars is an array containing the keys close to the character the
@ -751,6 +739,30 @@ inline UnigramDictionary::ProximityType UnigramDictionary::getMatchedProximityId
return UNRELATED_CHAR; return UNRELATED_CHAR;
} }
inline void UnigramDictionary::onTerminal(unsigned short int* word, const int depth,
const uint8_t* const root, const uint8_t flags, int pos,
const int inputIndex, const int matchWeight, const int skipPos,
const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
int* nextLetters, const int nextLettersSize) {
const bool isSameAsTyped = sameLength ? sameAsTyped(word, depth + 1) : false;
const bool hasAlternates = hasAlternateSpellings(flags);
if (isSameAsTyped && !hasAlternates) return;
if (depth >= MIN_SUGGEST_DEPTH) {
const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos,
excessivePos, transposedPos, freq, sameLength);
if (!isSameAsTyped)
addWord(word, depth + 1, finalFreq);
if (hasAlternates)
addWordAlternatesSpellings(DICT_ROOT, pos, flags, finalFreq);
}
if (sameLength && depth >= mInputLength && skipPos < 0) {
registerNextLetter(word[mInputLength], nextLetters, nextLettersSize);
}
}
inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth, inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex, const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex,
const int diffs, const int skipPos, const int excessivePos, const int transposedPos, const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
@ -770,6 +782,8 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
int freq; int freq;
bool isSameAsUserTypedLength = false; bool isSameAsUserTypedLength = false;
const uint8_t flags = 0; // No flags for now
if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex; if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;
*nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos, *nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,
@ -782,9 +796,8 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) { if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
mWord[depth] = c; mWord[depth] = c;
if (traverseAllNodes && terminal) { if (traverseAllNodes && terminal) {
onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, inputIndex, depth, onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,
matchWeight, nextLetters, nextLettersSize, skipPos, excessivePos, transposedPos, excessivePos, transposedPos, freq, false, nextLetters, nextLettersSize);
freq);
} }
if (!needsToTraverseChildrenNodes) return false; if (!needsToTraverseChildrenNodes) return false;
*newTraverseAllNodes = traverseAllNodes; *newTraverseAllNodes = traverseAllNodes;
@ -811,8 +824,8 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
bool isSameAsUserTypedLength = mInputLength == inputIndex + 1 bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
|| (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2); || (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
if (isSameAsUserTypedLength && terminal) { if (isSameAsUserTypedLength && terminal) {
onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, inputIndex, depth, matchWeight, onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,
skipPos, excessivePos, transposedPos, freq); excessivePos, transposedPos, freq, true, nextLetters, nextLettersSize);
} }
if (!needsToTraverseChildrenNodes) return false; if (!needsToTraverseChildrenNodes) return false;
// Start traversing all nodes after the index exceeds the user typed length // Start traversing all nodes after the index exceeds the user typed length

View File

@ -64,9 +64,9 @@ private:
bool checkIfDictVersionIsLatest(); bool checkIfDictVersionIsLatest();
int getAddress(int *pos); int getAddress(int *pos);
int getFreq(int *pos); int getFreq(int *pos);
int wideStrLen(unsigned short *str); bool sameAsTyped(const unsigned short *word, int length) const;
bool sameAsTyped(unsigned short *word, int length);
bool addWord(unsigned short *word, int length, int frequency); bool addWord(unsigned short *word, int length, int frequency);
void addWordAlternatesSpellings(const uint8_t* const root, int pos, int depth, int finalFreq);
void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth, void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs, const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters, const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters,
@ -83,13 +83,11 @@ private:
int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos, int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
const int excessivePos, const int transposedPos, const int freq, const int excessivePos, const int transposedPos, const int freq,
const bool sameLength) const; const bool sameLength) const;
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word, void onTerminal(unsigned short int* word, const int depth,
const int inputIndex, const int depth, const int snr, int *nextLetters, const uint8_t* const root, const uint8_t flags, int pos,
const int nextLettersSize, const int skipPos, const int excessivePos, const int inputIndex, const int matchWeight, const int skipPos,
const int transposedPos, const int freq); const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, int *nextLetters, const int nextLettersSize);
const int inputIndex, const int depth, const int snr, const int skipPos,
const int excessivePos, const int transposedPos, const int freq);
bool needsToSkipCurrentNode(const unsigned short c, bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth); const int inputIndex, const int skipPos, const int depth);
ProximityType getMatchedProximityId(const int *currentChars, const unsigned short c, ProximityType getMatchedProximityId(const int *currentChars, const unsigned short c,