New dict format, step 3

Some refactoring and add of a parameter that will be necessary.

Bug: 4392433
Change-Id: I17f001a7efd4f69f4c35f94ee1ca8e97391b81d5
This commit is contained in:
Jean Chalard 2011-06-16 22:51:11 +09:00
parent 8124e64dcc
commit 17e44a72e8
2 changed files with 20 additions and 12 deletions

View file

@ -289,8 +289,8 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
// Find the right insertion point // Find the right insertion point
int insertAt = 0; int insertAt = 0;
while (insertAt < MAX_WORDS) { while (insertAt < MAX_WORDS) {
if (frequency > mFrequencies[insertAt] || (mFrequencies[insertAt] == frequency // TODO: How should we sort words with the same frequency?
&& length < Dictionary::wideStrLen(mOutputChars + insertAt * MAX_WORD_LENGTH))) { if (frequency > mFrequencies[insertAt]) {
break; break;
} }
insertAt++; insertAt++;
@ -371,6 +371,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
mStackInputIndex[0] = 0; mStackInputIndex[0] = 0;
mStackDiffs[0] = 0; mStackDiffs[0] = 0;
mStackSiblingPos[0] = rootPosition; mStackSiblingPos[0] = rootPosition;
mStackOutputIndex[0] = 0;
// Depth first search // Depth first search
while (depth >= 0) { while (depth >= 0) {
@ -381,14 +382,15 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
int inputIndex = mStackInputIndex[depth]; int inputIndex = mStackInputIndex[depth];
int diffs = mStackDiffs[depth]; int diffs = mStackDiffs[depth];
int siblingPos = mStackSiblingPos[depth]; int siblingPos = mStackSiblingPos[depth];
int outputIndex = mStackOutputIndex[depth];
int firstChildPos; int firstChildPos;
// depth will never be greater than maxDepth because in that case, // depth will never be greater than maxDepth because in that case,
// needsToTraverseChildrenNodes should be false // needsToTraverseChildrenNodes should be false
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, outputIndex,
maxDepth, traverseAllNodes, matchWeight, inputIndex, diffs, skipPos, maxDepth, traverseAllNodes, matchWeight, inputIndex, diffs, skipPos,
excessivePos, transposedPos, nextLetters, nextLettersSize, &childCount, excessivePos, transposedPos, nextLetters, nextLettersSize, &childCount,
&firstChildPos, &traverseAllNodes, &matchWeight, &inputIndex, &diffs, &firstChildPos, &traverseAllNodes, &matchWeight, &inputIndex, &diffs,
&siblingPos); &siblingPos, &outputIndex);
// Update next sibling pos // Update next sibling pos
mStackSiblingPos[depth] = siblingPos; mStackSiblingPos[depth] = siblingPos;
if (needsToTraverseChildrenNodes) { if (needsToTraverseChildrenNodes) {
@ -400,6 +402,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
mStackInputIndex[depth] = inputIndex; mStackInputIndex[depth] = inputIndex;
mStackDiffs[depth] = diffs; mStackDiffs[depth] = diffs;
mStackSiblingPos[depth] = firstChildPos; mStackSiblingPos[depth] = firstChildPos;
mStackOutputIndex[depth] = outputIndex;
} }
} else { } else {
// Goes to parent sibling node // Goes to parent sibling node
@ -582,12 +585,13 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
int newInputIndex; int newInputIndex;
int newDiffs; int newDiffs;
int newSiblingPos; int newSiblingPos;
int newOutputIndex;
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth, const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth,
traverseAllNodes, matchWeight, inputIndex, diffs, traverseAllNodes, matchWeight, inputIndex, diffs,
skipPos, excessivePos, transposedPos, skipPos, excessivePos, transposedPos,
nextLetters, nextLettersSize, nextLetters, nextLettersSize,
&newCount, &newChildPosition, &newTraverseAllNodes, &newMatchRate, &newCount, &newChildPosition, &newTraverseAllNodes, &newMatchRate,
&newInputIndex, &newDiffs, &newSiblingPos); &newInputIndex, &newDiffs, &newSiblingPos, &newOutputIndex);
siblingPos = newSiblingPos; siblingPos = newSiblingPos;
if (needsToTraverseChildrenNodes) { if (needsToTraverseChildrenNodes) {
@ -753,7 +757,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
const int diffs, const int skipPos, const int excessivePos, const int transposedPos, const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition, int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs, bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,
int *nextSiblingPosition) { int *nextSiblingPosition, int *nextOutputIndex) {
if (DEBUG_DICT) { if (DEBUG_DICT) {
int inputCount = 0; int inputCount = 0;
if (skipPos >= 0) ++inputCount; if (skipPos >= 0) ++inputCount;
@ -771,6 +775,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
*nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos, *nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,
&c, &childPosition, &terminal, &freq); &c, &childPosition, &terminal, &freq);
*nextOutputIndex = depth + 1;
const bool needsToTraverseChildrenNodes = childPosition != 0; const bool needsToTraverseChildrenNodes = childPosition != 0;
@ -927,13 +932,15 @@ inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstCh
// TODO: use uint32_t instead of unsigned short // TODO: use uint32_t instead of unsigned short
bool UnigramDictionary::isValidWord(unsigned short *word, int length) { bool UnigramDictionary::isValidWord(unsigned short *word, int length) {
if (IS_LATEST_DICT_VERSION) { if (IS_LATEST_DICT_VERSION) {
return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD); return (getFrequency(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);
} else { } else {
return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD); return (getFrequency(0, word, 0, length) != NOT_VALID_WORD);
} }
} }
int UnigramDictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
// Require strict exact match.
int UnigramDictionary::getFrequency(int pos, unsigned short *word, int offset, int length) const {
// returns address of bigram data of that word // returns address of bigram data of that word
// return -99 if not found // return -99 if not found
@ -950,7 +957,7 @@ int UnigramDictionary::isValidWordRec(int pos, unsigned short *word, int offset,
} }
} else { } else {
if (childPos != 0) { if (childPos != 0) {
int t = isValidWordRec(childPos, word, offset + 1, length); int t = getFrequency(childPos, word, offset + 1, length);
if (t > 0) { if (t > 0) {
return t; return t;
} }

View file

@ -59,7 +59,7 @@ private:
void getSuggestionCandidates(const int skipPos, const int excessivePos, void getSuggestionCandidates(const int skipPos, const int excessivePos,
const int transposedPos, int *nextLetters, const int nextLettersSize, const int transposedPos, int *nextLetters, const int nextLettersSize,
const int maxDepth); const int maxDepth);
int isValidWordRec(int pos, unsigned short *word, int offset, int length); int getFrequency(int pos, unsigned short *word, int offset, int length) const;
void getVersionNumber(); void getVersionNumber();
bool checkIfDictVersionIsLatest(); bool checkIfDictVersionIsLatest();
int getAddress(int *pos); int getAddress(int *pos);
@ -100,7 +100,7 @@ private:
const int diffs, const int skipPos, const int excessivePos, const int transposedPos, const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition, int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs, bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
int *nextSiblingPosition); int *nextSiblingPosition, int *nextOutputIndex);
int getBestWordFreq(const int startInputIndex, const int inputLength, unsigned short *word); int getBestWordFreq(const int startInputIndex, const int inputLength, unsigned short *word);
// Process a node by considering missing space // Process a node by considering missing space
bool processCurrentNodeForExactMatch(const int firstChildPos, bool processCurrentNodeForExactMatch(const int firstChildPos,
@ -145,6 +145,7 @@ private:
int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL]; int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];
int mStackDiffs[MAX_WORD_LENGTH_INTERNAL]; int mStackDiffs[MAX_WORD_LENGTH_INTERNAL];
int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL]; int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];
int mStackOutputIndex[MAX_WORD_LENGTH_INTERNAL];
int mNextLettersFrequency[NEXT_LETTERS_SIZE]; int mNextLettersFrequency[NEXT_LETTERS_SIZE];
}; };