Prune traversing a bit agressively and add a flag not to do auto completion

+1       1
-1       2
+2       0
-2       0
+3       0
-3       0
+4       6
-4       1
+5       4
-5       3
+6       3
-6      10
+7       7
-7       5

Before:
Total 42936.28 (sum of others 42814.63)

After:
Total 40860.56 (sum of others 40733.92)

Change-Id: I6a3d52f31ec181970083358280c3ebaca0a1f63e
This commit is contained in:
satok 2011-12-14 21:38:11 +09:00
parent 5a39e527dd
commit d03317c4be
3 changed files with 13 additions and 7 deletions

View file

@ -145,7 +145,7 @@ void Correction::initCorrectionState(
void Correction::setCorrectionParams(const int skipPos, const int excessivePos, void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
const int transposedPos, const int spaceProximityPos, const int missingSpacePos, const int transposedPos, const int spaceProximityPos, const int missingSpacePos,
const bool useFullEditDistance) { const bool useFullEditDistance, const bool doAutoCompletion) {
// TODO: remove // TODO: remove
mTransposedPos = transposedPos; mTransposedPos = transposedPos;
mExcessivePos = excessivePos; mExcessivePos = excessivePos;
@ -158,6 +158,7 @@ void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
mSpaceProximityPos = spaceProximityPos; mSpaceProximityPos = spaceProximityPos;
mMissingSpacePos = missingSpacePos; mMissingSpacePos = missingSpacePos;
mUseFullEditDistance = useFullEditDistance; mUseFullEditDistance = useFullEditDistance;
mDoAutoCompletion = doAutoCompletion;
} }
void Correction::checkState() { void Correction::checkState() {
@ -279,7 +280,9 @@ void Correction::startToTraverseAllNodes() {
bool Correction::needsToPrune() const { bool Correction::needsToPrune() const {
// TODO: use edit distance here // TODO: use edit distance here
return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance; return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance
// Allow one char longer word for missing character
|| (!mDoAutoCompletion && (mOutputIndex + 1 >= mInputLength));
} }
void Correction::addCharToCurrentWord(const int32_t c) { void Correction::addCharToCurrentWord(const int32_t c) {

View file

@ -44,7 +44,8 @@ public:
// TODO: remove // TODO: remove
void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance); const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
const bool doAutoCompletion);
void checkState(); void checkState();
bool initProcessState(const int index); bool initProcessState(const int index);
@ -109,6 +110,7 @@ private:
const ProximityInfo *mProximityInfo; const ProximityInfo *mProximityInfo;
bool mUseFullEditDistance; bool mUseFullEditDistance;
bool mDoAutoCompletion;
int mMaxEditDistance; int mMaxEditDistance;
int mMaxDepth; int mMaxDepth;
int mInputLength; int mInputLength;

View file

@ -260,7 +260,8 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
const int inputLength, Correction *correction, WordsPriorityQueue *queue) { const int inputLength, Correction *correction, WordsPriorityQueue *queue) {
// TODO: Remove setCorrectionParams // TODO: Remove setCorrectionParams
correction->setCorrectionParams(0, 0, 0, correction->setCorrectionParams(0, 0, 0,
-1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance); -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
true /* doAutoCompletion */);
int rootPosition = ROOT_POS; int rootPosition = ROOT_POS;
// Get the number of children of root, then increment the position // Get the number of children of root, then increment the position
int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition); int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
@ -295,7 +296,7 @@ void UnigramDictionary::getMissingSpaceWords(
Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) { Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) {
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos, -1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos,
useFullEditDistance); useFullEditDistance, true /* doAutoCompletion */);
getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue); getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue);
} }
@ -304,7 +305,7 @@ void UnigramDictionary::getMistypedSpaceWords(
Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) { Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) {
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */, -1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */,
useFullEditDistance); useFullEditDistance, true /* doAutoCompletion */);
getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue); getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue);
} }
@ -585,7 +586,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL
|| stateType == Correction::ON_TERMINAL) { || stateType == Correction::ON_TERMINAL) {
needsToInvokeOnTerminal = true; needsToInvokeOnTerminal = true;
} else if (stateType == Correction::UNRELATED) { } else if (stateType == Correction::UNRELATED || correction->needsToPrune()) {
// We found that this is an unrelated character, so we should give up traversing // We found that this is an unrelated character, so we should give up traversing
// this node and its children entirely. // this node and its children entirely.
// However we may not be on the last virtual node yet so we skip the remaining // However we may not be on the last virtual node yet so we skip the remaining