New dict format, step 3
Some refactoring and add of a parameter that will be necessary. Bug: 4392433 Change-Id: I17f001a7efd4f69f4c35f94ee1ca8e97391b81d5
This commit is contained in:
parent
8124e64dcc
commit
17e44a72e8
2 changed files with 20 additions and 12 deletions
|
@ -289,8 +289,8 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
|
||||||
// Find the right insertion point
|
// Find the right insertion point
|
||||||
int insertAt = 0;
|
int insertAt = 0;
|
||||||
while (insertAt < MAX_WORDS) {
|
while (insertAt < MAX_WORDS) {
|
||||||
if (frequency > mFrequencies[insertAt] || (mFrequencies[insertAt] == frequency
|
// TODO: How should we sort words with the same frequency?
|
||||||
&& length < Dictionary::wideStrLen(mOutputChars + insertAt * MAX_WORD_LENGTH))) {
|
if (frequency > mFrequencies[insertAt]) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
insertAt++;
|
insertAt++;
|
||||||
|
@ -371,6 +371,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
|
||||||
mStackInputIndex[0] = 0;
|
mStackInputIndex[0] = 0;
|
||||||
mStackDiffs[0] = 0;
|
mStackDiffs[0] = 0;
|
||||||
mStackSiblingPos[0] = rootPosition;
|
mStackSiblingPos[0] = rootPosition;
|
||||||
|
mStackOutputIndex[0] = 0;
|
||||||
|
|
||||||
// Depth first search
|
// Depth first search
|
||||||
while (depth >= 0) {
|
while (depth >= 0) {
|
||||||
|
@ -381,14 +382,15 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
|
||||||
int inputIndex = mStackInputIndex[depth];
|
int inputIndex = mStackInputIndex[depth];
|
||||||
int diffs = mStackDiffs[depth];
|
int diffs = mStackDiffs[depth];
|
||||||
int siblingPos = mStackSiblingPos[depth];
|
int siblingPos = mStackSiblingPos[depth];
|
||||||
|
int outputIndex = mStackOutputIndex[depth];
|
||||||
int firstChildPos;
|
int firstChildPos;
|
||||||
// depth will never be greater than maxDepth because in that case,
|
// depth will never be greater than maxDepth because in that case,
|
||||||
// needsToTraverseChildrenNodes should be false
|
// needsToTraverseChildrenNodes should be false
|
||||||
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth,
|
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, outputIndex,
|
||||||
maxDepth, traverseAllNodes, matchWeight, inputIndex, diffs, skipPos,
|
maxDepth, traverseAllNodes, matchWeight, inputIndex, diffs, skipPos,
|
||||||
excessivePos, transposedPos, nextLetters, nextLettersSize, &childCount,
|
excessivePos, transposedPos, nextLetters, nextLettersSize, &childCount,
|
||||||
&firstChildPos, &traverseAllNodes, &matchWeight, &inputIndex, &diffs,
|
&firstChildPos, &traverseAllNodes, &matchWeight, &inputIndex, &diffs,
|
||||||
&siblingPos);
|
&siblingPos, &outputIndex);
|
||||||
// Update next sibling pos
|
// Update next sibling pos
|
||||||
mStackSiblingPos[depth] = siblingPos;
|
mStackSiblingPos[depth] = siblingPos;
|
||||||
if (needsToTraverseChildrenNodes) {
|
if (needsToTraverseChildrenNodes) {
|
||||||
|
@ -400,6 +402,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
|
||||||
mStackInputIndex[depth] = inputIndex;
|
mStackInputIndex[depth] = inputIndex;
|
||||||
mStackDiffs[depth] = diffs;
|
mStackDiffs[depth] = diffs;
|
||||||
mStackSiblingPos[depth] = firstChildPos;
|
mStackSiblingPos[depth] = firstChildPos;
|
||||||
|
mStackOutputIndex[depth] = outputIndex;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Goes to parent sibling node
|
// Goes to parent sibling node
|
||||||
|
@ -582,12 +585,13 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
|
||||||
int newInputIndex;
|
int newInputIndex;
|
||||||
int newDiffs;
|
int newDiffs;
|
||||||
int newSiblingPos;
|
int newSiblingPos;
|
||||||
|
int newOutputIndex;
|
||||||
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth,
|
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth,
|
||||||
traverseAllNodes, matchWeight, inputIndex, diffs,
|
traverseAllNodes, matchWeight, inputIndex, diffs,
|
||||||
skipPos, excessivePos, transposedPos,
|
skipPos, excessivePos, transposedPos,
|
||||||
nextLetters, nextLettersSize,
|
nextLetters, nextLettersSize,
|
||||||
&newCount, &newChildPosition, &newTraverseAllNodes, &newMatchRate,
|
&newCount, &newChildPosition, &newTraverseAllNodes, &newMatchRate,
|
||||||
&newInputIndex, &newDiffs, &newSiblingPos);
|
&newInputIndex, &newDiffs, &newSiblingPos, &newOutputIndex);
|
||||||
siblingPos = newSiblingPos;
|
siblingPos = newSiblingPos;
|
||||||
|
|
||||||
if (needsToTraverseChildrenNodes) {
|
if (needsToTraverseChildrenNodes) {
|
||||||
|
@ -753,7 +757,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
|
||||||
const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
|
const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
|
||||||
int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
|
int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
|
||||||
bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,
|
bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,
|
||||||
int *nextSiblingPosition) {
|
int *nextSiblingPosition, int *nextOutputIndex) {
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
int inputCount = 0;
|
int inputCount = 0;
|
||||||
if (skipPos >= 0) ++inputCount;
|
if (skipPos >= 0) ++inputCount;
|
||||||
|
@ -771,6 +775,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
|
||||||
|
|
||||||
*nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,
|
*nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,
|
||||||
&c, &childPosition, &terminal, &freq);
|
&c, &childPosition, &terminal, &freq);
|
||||||
|
*nextOutputIndex = depth + 1;
|
||||||
|
|
||||||
const bool needsToTraverseChildrenNodes = childPosition != 0;
|
const bool needsToTraverseChildrenNodes = childPosition != 0;
|
||||||
|
|
||||||
|
@ -927,13 +932,15 @@ inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstCh
|
||||||
// TODO: use uint32_t instead of unsigned short
|
// TODO: use uint32_t instead of unsigned short
|
||||||
bool UnigramDictionary::isValidWord(unsigned short *word, int length) {
|
bool UnigramDictionary::isValidWord(unsigned short *word, int length) {
|
||||||
if (IS_LATEST_DICT_VERSION) {
|
if (IS_LATEST_DICT_VERSION) {
|
||||||
return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);
|
return (getFrequency(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);
|
||||||
} else {
|
} else {
|
||||||
return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD);
|
return (getFrequency(0, word, 0, length) != NOT_VALID_WORD);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int UnigramDictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
|
|
||||||
|
// Require strict exact match.
|
||||||
|
int UnigramDictionary::getFrequency(int pos, unsigned short *word, int offset, int length) const {
|
||||||
// returns address of bigram data of that word
|
// returns address of bigram data of that word
|
||||||
// return -99 if not found
|
// return -99 if not found
|
||||||
|
|
||||||
|
@ -950,7 +957,7 @@ int UnigramDictionary::isValidWordRec(int pos, unsigned short *word, int offset,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (childPos != 0) {
|
if (childPos != 0) {
|
||||||
int t = isValidWordRec(childPos, word, offset + 1, length);
|
int t = getFrequency(childPos, word, offset + 1, length);
|
||||||
if (t > 0) {
|
if (t > 0) {
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,7 +59,7 @@ private:
|
||||||
void getSuggestionCandidates(const int skipPos, const int excessivePos,
|
void getSuggestionCandidates(const int skipPos, const int excessivePos,
|
||||||
const int transposedPos, int *nextLetters, const int nextLettersSize,
|
const int transposedPos, int *nextLetters, const int nextLettersSize,
|
||||||
const int maxDepth);
|
const int maxDepth);
|
||||||
int isValidWordRec(int pos, unsigned short *word, int offset, int length);
|
int getFrequency(int pos, unsigned short *word, int offset, int length) const;
|
||||||
void getVersionNumber();
|
void getVersionNumber();
|
||||||
bool checkIfDictVersionIsLatest();
|
bool checkIfDictVersionIsLatest();
|
||||||
int getAddress(int *pos);
|
int getAddress(int *pos);
|
||||||
|
@ -100,7 +100,7 @@ private:
|
||||||
const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
|
const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
|
||||||
int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
|
int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
|
||||||
bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
|
bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
|
||||||
int *nextSiblingPosition);
|
int *nextSiblingPosition, int *nextOutputIndex);
|
||||||
int getBestWordFreq(const int startInputIndex, const int inputLength, unsigned short *word);
|
int getBestWordFreq(const int startInputIndex, const int inputLength, unsigned short *word);
|
||||||
// Process a node by considering missing space
|
// Process a node by considering missing space
|
||||||
bool processCurrentNodeForExactMatch(const int firstChildPos,
|
bool processCurrentNodeForExactMatch(const int firstChildPos,
|
||||||
|
@ -145,6 +145,7 @@ private:
|
||||||
int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];
|
int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];
|
||||||
int mStackDiffs[MAX_WORD_LENGTH_INTERNAL];
|
int mStackDiffs[MAX_WORD_LENGTH_INTERNAL];
|
||||||
int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];
|
int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];
|
||||||
|
int mStackOutputIndex[MAX_WORD_LENGTH_INTERNAL];
|
||||||
int mNextLettersFrequency[NEXT_LETTERS_SIZE];
|
int mNextLettersFrequency[NEXT_LETTERS_SIZE];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue