From d03317c4be21ee65c19d00c7b83a7042042b8627 Mon Sep 17 00:00:00 2001 From: satok Date: Wed, 14 Dec 2011 21:38:11 +0900 Subject: [PATCH] Prune traversing a bit agressively and add a flag not to do auto completion +1 1 -1 2 +2 0 -2 0 +3 0 -3 0 +4 6 -4 1 +5 4 -5 3 +6 3 -6 10 +7 7 -7 5 Before: Total 42936.28 (sum of others 42814.63) After: Total 40860.56 (sum of others 40733.92) Change-Id: I6a3d52f31ec181970083358280c3ebaca0a1f63e --- native/src/correction.cpp | 7 +++++-- native/src/correction.h | 4 +++- native/src/unigram_dictionary.cpp | 9 +++++---- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/native/src/correction.cpp b/native/src/correction.cpp index 22ee75a24..364913f38 100644 --- a/native/src/correction.cpp +++ b/native/src/correction.cpp @@ -145,7 +145,7 @@ void Correction::initCorrectionState( void Correction::setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, const int spaceProximityPos, const int missingSpacePos, - const bool useFullEditDistance) { + const bool useFullEditDistance, const bool doAutoCompletion) { // TODO: remove mTransposedPos = transposedPos; mExcessivePos = excessivePos; @@ -158,6 +158,7 @@ void Correction::setCorrectionParams(const int skipPos, const int excessivePos, mSpaceProximityPos = spaceProximityPos; mMissingSpacePos = missingSpacePos; mUseFullEditDistance = useFullEditDistance; + mDoAutoCompletion = doAutoCompletion; } void Correction::checkState() { @@ -279,7 +280,9 @@ void Correction::startToTraverseAllNodes() { bool Correction::needsToPrune() const { // TODO: use edit distance here - return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance; + return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance + // Allow one char longer word for missing character + || (!mDoAutoCompletion && (mOutputIndex + 1 >= mInputLength)); } void Correction::addCharToCurrentWord(const int32_t c) { diff --git a/native/src/correction.h b/native/src/correction.h index d4e99f0ce..4a8d1fab7 100644 --- a/native/src/correction.h +++ b/native/src/correction.h @@ -44,7 +44,8 @@ public: // TODO: remove void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, - const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance); + const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance, + const bool doAutoCompletion); void checkState(); bool initProcessState(const int index); @@ -109,6 +110,7 @@ private: const ProximityInfo *mProximityInfo; bool mUseFullEditDistance; + bool mDoAutoCompletion; int mMaxEditDistance; int mMaxDepth; int mInputLength; diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index a2c1f72a1..7cf191970 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -260,7 +260,8 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueue *queue) { // TODO: Remove setCorrectionParams correction->setCorrectionParams(0, 0, 0, - -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance); + -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance, + true /* doAutoCompletion */); int rootPosition = ROOT_POS; // Get the number of children of root, then increment the position int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition); @@ -295,7 +296,7 @@ void UnigramDictionary::getMissingSpaceWords( Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) { correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos, - useFullEditDistance); + useFullEditDistance, true /* doAutoCompletion */); getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue); } @@ -304,7 +305,7 @@ void UnigramDictionary::getMistypedSpaceWords( Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) { correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */, - useFullEditDistance); + useFullEditDistance, true /* doAutoCompletion */); getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue); } @@ -585,7 +586,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL || stateType == Correction::ON_TERMINAL) { needsToInvokeOnTerminal = true; - } else if (stateType == Correction::UNRELATED) { + } else if (stateType == Correction::UNRELATED || correction->needsToPrune()) { // We found that this is an unrelated character, so we should give up traversing // this node and its children entirely. // However we may not be on the last virtual node yet so we skip the remaining