Prune traversing a bit agressively and add a flag not to do auto completion

+1       1
-1       2
+2       0
-2       0
+3       0
-3       0
+4       6
-4       1
+5       4
-5       3
+6       3
-6      10
+7       7
-7       5

Before:
Total 42936.28 (sum of others 42814.63)

After:
Total 40860.56 (sum of others 40733.92)

Change-Id: I6a3d52f31ec181970083358280c3ebaca0a1f63e
main
satok 2011-12-14 21:38:11 +09:00
parent 5a39e527dd
commit d03317c4be
3 changed files with 13 additions and 7 deletions

View File

@ -145,7 +145,7 @@ void Correction::initCorrectionState(
void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
const int transposedPos, const int spaceProximityPos, const int missingSpacePos,
const bool useFullEditDistance) {
const bool useFullEditDistance, const bool doAutoCompletion) {
// TODO: remove
mTransposedPos = transposedPos;
mExcessivePos = excessivePos;
@ -158,6 +158,7 @@ void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
mSpaceProximityPos = spaceProximityPos;
mMissingSpacePos = missingSpacePos;
mUseFullEditDistance = useFullEditDistance;
mDoAutoCompletion = doAutoCompletion;
}
void Correction::checkState() {
@ -279,7 +280,9 @@ void Correction::startToTraverseAllNodes() {
bool Correction::needsToPrune() const {
// TODO: use edit distance here
return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance;
return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance
// Allow one char longer word for missing character
|| (!mDoAutoCompletion && (mOutputIndex + 1 >= mInputLength));
}
void Correction::addCharToCurrentWord(const int32_t c) {

View File

@ -44,7 +44,8 @@ public:
// TODO: remove
void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance);
const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
const bool doAutoCompletion);
void checkState();
bool initProcessState(const int index);
@ -109,6 +110,7 @@ private:
const ProximityInfo *mProximityInfo;
bool mUseFullEditDistance;
bool mDoAutoCompletion;
int mMaxEditDistance;
int mMaxDepth;
int mInputLength;

View File

@ -260,7 +260,8 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
const int inputLength, Correction *correction, WordsPriorityQueue *queue) {
// TODO: Remove setCorrectionParams
correction->setCorrectionParams(0, 0, 0,
-1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance);
-1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
true /* doAutoCompletion */);
int rootPosition = ROOT_POS;
// Get the number of children of root, then increment the position
int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
@ -295,7 +296,7 @@ void UnigramDictionary::getMissingSpaceWords(
Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) {
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos,
useFullEditDistance);
useFullEditDistance, true /* doAutoCompletion */);
getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue);
}
@ -304,7 +305,7 @@ void UnigramDictionary::getMistypedSpaceWords(
Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) {
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */,
useFullEditDistance);
useFullEditDistance, true /* doAutoCompletion */);
getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue);
}
@ -585,7 +586,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL
|| stateType == Correction::ON_TERMINAL) {
needsToInvokeOnTerminal = true;
} else if (stateType == Correction::UNRELATED) {
} else if (stateType == Correction::UNRELATED || correction->needsToPrune()) {
// We found that this is an unrelated character, so we should give up traversing
// this node and its children entirely.
// However we may not be on the last virtual node yet so we skip the remaining