Merge "New dict format, step 4"
commit
23eb0fa0b5
|
@ -272,6 +272,7 @@ static inline void registerNextLetter(unsigned short c, int *nextLetters, int ne
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: We need to optimize addWord by using STL or something
|
// TODO: We need to optimize addWord by using STL or something
|
||||||
|
// TODO: This needs to take an const unsigned short* and not tinker with its contents
|
||||||
bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) {
|
bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) {
|
||||||
word[length] = 0;
|
word[length] = 0;
|
||||||
if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) {
|
if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) {
|
||||||
|
@ -321,6 +322,16 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void UnigramDictionary::addWordAlternatesSpellings(const uint8_t* const root, int pos,
|
||||||
|
int depth, int finalFreq) {
|
||||||
|
// TODO: actually add alternates when the format supports it.
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool hasAlternateSpellings(uint8_t flags) {
|
||||||
|
// TODO: when the format supports it, return the actual value.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static inline unsigned short toBaseLowerCase(unsigned short c) {
|
static inline unsigned short toBaseLowerCase(unsigned short c) {
|
||||||
if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
|
if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
|
||||||
c = BASE_CHARS[c];
|
c = BASE_CHARS[c];
|
||||||
|
@ -333,7 +344,7 @@ static inline unsigned short toBaseLowerCase(unsigned short c) {
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) {
|
bool UnigramDictionary::sameAsTyped(const unsigned short *word, int length) const {
|
||||||
if (length != mInputLength) {
|
if (length != mInputLength) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -656,28 +667,6 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
|
||||||
return finalFreq;
|
return finalFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength(
|
|
||||||
unsigned short *word, const int inputIndex, const int depth, const int matchWeight,
|
|
||||||
int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
|
|
||||||
const int transposedPos, const int freq) {
|
|
||||||
const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos, excessivePos,
|
|
||||||
transposedPos, freq, false);
|
|
||||||
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
|
||||||
if (depth >= mInputLength && skipPos < 0) {
|
|
||||||
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
|
|
||||||
unsigned short *word, const int inputIndex, const int depth, const int matchWeight,
|
|
||||||
const int skipPos, const int excessivePos, const int transposedPos, const int freq) {
|
|
||||||
if (sameAsTyped(word, depth + 1)) return;
|
|
||||||
const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos,
|
|
||||||
excessivePos, transposedPos, freq, true);
|
|
||||||
// Proximity collection will promote a word of the same length as what user typed.
|
|
||||||
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
|
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
|
||||||
const int inputIndex, const int skipPos, const int depth) {
|
const int inputIndex, const int skipPos, const int depth) {
|
||||||
const unsigned short userTypedChar = getInputCharsAt(inputIndex)[0];
|
const unsigned short userTypedChar = getInputCharsAt(inputIndex)[0];
|
||||||
|
@ -708,7 +697,6 @@ inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// In the following function, c is the current character of the dictionary word
|
// In the following function, c is the current character of the dictionary word
|
||||||
// currently examined.
|
// currently examined.
|
||||||
// currentChars is an array containing the keys close to the character the
|
// currentChars is an array containing the keys close to the character the
|
||||||
|
@ -751,6 +739,30 @@ inline UnigramDictionary::ProximityType UnigramDictionary::getMatchedProximityId
|
||||||
return UNRELATED_CHAR;
|
return UNRELATED_CHAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void UnigramDictionary::onTerminal(unsigned short int* word, const int depth,
|
||||||
|
const uint8_t* const root, const uint8_t flags, int pos,
|
||||||
|
const int inputIndex, const int matchWeight, const int skipPos,
|
||||||
|
const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
|
||||||
|
int* nextLetters, const int nextLettersSize) {
|
||||||
|
|
||||||
|
const bool isSameAsTyped = sameLength ? sameAsTyped(word, depth + 1) : false;
|
||||||
|
const bool hasAlternates = hasAlternateSpellings(flags);
|
||||||
|
if (isSameAsTyped && !hasAlternates) return;
|
||||||
|
|
||||||
|
if (depth >= MIN_SUGGEST_DEPTH) {
|
||||||
|
const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos,
|
||||||
|
excessivePos, transposedPos, freq, sameLength);
|
||||||
|
if (!isSameAsTyped)
|
||||||
|
addWord(word, depth + 1, finalFreq);
|
||||||
|
if (hasAlternates)
|
||||||
|
addWordAlternatesSpellings(DICT_ROOT, pos, flags, finalFreq);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sameLength && depth >= mInputLength && skipPos < 0) {
|
||||||
|
registerNextLetter(word[mInputLength], nextLetters, nextLettersSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
|
inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
|
||||||
const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex,
|
const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex,
|
||||||
const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
|
const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
|
||||||
|
@ -770,6 +782,8 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
|
||||||
int freq;
|
int freq;
|
||||||
bool isSameAsUserTypedLength = false;
|
bool isSameAsUserTypedLength = false;
|
||||||
|
|
||||||
|
const uint8_t flags = 0; // No flags for now
|
||||||
|
|
||||||
if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;
|
if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;
|
||||||
|
|
||||||
*nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,
|
*nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,
|
||||||
|
@ -782,9 +796,8 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
|
||||||
if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
|
if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
|
||||||
mWord[depth] = c;
|
mWord[depth] = c;
|
||||||
if (traverseAllNodes && terminal) {
|
if (traverseAllNodes && terminal) {
|
||||||
onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, inputIndex, depth,
|
onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,
|
||||||
matchWeight, nextLetters, nextLettersSize, skipPos, excessivePos, transposedPos,
|
excessivePos, transposedPos, freq, false, nextLetters, nextLettersSize);
|
||||||
freq);
|
|
||||||
}
|
}
|
||||||
if (!needsToTraverseChildrenNodes) return false;
|
if (!needsToTraverseChildrenNodes) return false;
|
||||||
*newTraverseAllNodes = traverseAllNodes;
|
*newTraverseAllNodes = traverseAllNodes;
|
||||||
|
@ -811,8 +824,8 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
|
||||||
bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
|
bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
|
||||||
|| (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
|
|| (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
|
||||||
if (isSameAsUserTypedLength && terminal) {
|
if (isSameAsUserTypedLength && terminal) {
|
||||||
onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, inputIndex, depth, matchWeight,
|
onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,
|
||||||
skipPos, excessivePos, transposedPos, freq);
|
excessivePos, transposedPos, freq, true, nextLetters, nextLettersSize);
|
||||||
}
|
}
|
||||||
if (!needsToTraverseChildrenNodes) return false;
|
if (!needsToTraverseChildrenNodes) return false;
|
||||||
// Start traversing all nodes after the index exceeds the user typed length
|
// Start traversing all nodes after the index exceeds the user typed length
|
||||||
|
|
|
@ -64,9 +64,9 @@ private:
|
||||||
bool checkIfDictVersionIsLatest();
|
bool checkIfDictVersionIsLatest();
|
||||||
int getAddress(int *pos);
|
int getAddress(int *pos);
|
||||||
int getFreq(int *pos);
|
int getFreq(int *pos);
|
||||||
int wideStrLen(unsigned short *str);
|
bool sameAsTyped(const unsigned short *word, int length) const;
|
||||||
bool sameAsTyped(unsigned short *word, int length);
|
|
||||||
bool addWord(unsigned short *word, int length, int frequency);
|
bool addWord(unsigned short *word, int length, int frequency);
|
||||||
|
void addWordAlternatesSpellings(const uint8_t* const root, int pos, int depth, int finalFreq);
|
||||||
void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
|
void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
|
||||||
const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
|
const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
|
||||||
const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters,
|
const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters,
|
||||||
|
@ -83,13 +83,11 @@ private:
|
||||||
int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
|
int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
|
||||||
const int excessivePos, const int transposedPos, const int freq,
|
const int excessivePos, const int transposedPos, const int freq,
|
||||||
const bool sameLength) const;
|
const bool sameLength) const;
|
||||||
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
void onTerminal(unsigned short int* word, const int depth,
|
||||||
const int inputIndex, const int depth, const int snr, int *nextLetters,
|
const uint8_t* const root, const uint8_t flags, int pos,
|
||||||
const int nextLettersSize, const int skipPos, const int excessivePos,
|
const int inputIndex, const int matchWeight, const int skipPos,
|
||||||
const int transposedPos, const int freq);
|
const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
|
||||||
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word,
|
int *nextLetters, const int nextLettersSize);
|
||||||
const int inputIndex, const int depth, const int snr, const int skipPos,
|
|
||||||
const int excessivePos, const int transposedPos, const int freq);
|
|
||||||
bool needsToSkipCurrentNode(const unsigned short c,
|
bool needsToSkipCurrentNode(const unsigned short c,
|
||||||
const int inputIndex, const int skipPos, const int depth);
|
const int inputIndex, const int skipPos, const int depth);
|
||||||
ProximityType getMatchedProximityId(const int *currentChars, const unsigned short c,
|
ProximityType getMatchedProximityId(const int *currentChars, const unsigned short c,
|
||||||
|
|
Loading…
Reference in New Issue