From 67e3cc8488aeae6fbeec139a07c08491b0fc40e8 Mon Sep 17 00:00:00 2001 From: Satoshi Kataoka Date: Thu, 31 May 2012 15:04:58 +0900 Subject: [PATCH] Enhance the safety net in multiple word suggestions Bug: 6576793 Change-Id: I97100b482d3cf4e1b417c197b5d1e971a72b998d --- native/jni/src/defines.h | 5 +-- native/jni/src/unigram_dictionary.cpp | 45 +++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index b61ebd23b..cd2fc634a 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -234,11 +234,12 @@ static inline void prof_out(void) { #define SUB_QUEUE_MAX_WORDS 1 #define SUB_QUEUE_MAX_COUNT 10 #define SUB_QUEUE_MIN_WORD_LENGTH 4 -#define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 10 +// TODO: Extend this limitation +#define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 5 // TODO: Remove this limitation #define MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH 12 // TODO: Remove this limitation -#define MULTIPLE_WORDS_SUGGESTION_MAX_TOTAL_TRAVERSE_COUNT 110 +#define MULTIPLE_WORDS_SUGGESTION_MAX_TOTAL_TRAVERSE_COUNT 45 #define MULTIPLE_WORDS_DEMOTION_RATE 80 #define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6 diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index 690d8dc6f..e523b2f2c 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -430,6 +430,48 @@ int UnigramDictionary::getSubStringSuggestion( const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) { + if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) { + return FLAG_MULTIPLE_SUGGEST_ABORT; + } + + ///////////////////////////////////////////// + // safety net for multiple word suggestion // + // TODO: Remove this safety net // + ///////////////////////////////////////////// + int smallWordCount = 0; + int singleLetterWordCount = 0; + if (inputWordLength == 1) { + ++singleLetterWordCount; + } + if (inputWordLength <= 2) { + // small word == single letter or 2-letter word + ++smallWordCount; + } + for (int i = 0; i < currentWordIndex; ++i) { + const int length = wordLengthArray[i]; + if (length == 1) { + ++singleLetterWordCount; + // Safety net to avoid suggesting sequential single letter words + if (i < (currentWordIndex - 1)) { + if (wordLengthArray[i + 1] == 1) { + return FLAG_MULTIPLE_SUGGEST_ABORT; + } + } else if (inputWordLength == 1) { + return FLAG_MULTIPLE_SUGGEST_ABORT; + } + } + if (length <= 2) { + ++smallWordCount; + } + // Safety net to avoid suggesting multiple words with many (4 or more, for now) small words + if (singleLetterWordCount >= 3 || smallWordCount >= 4) { + return FLAG_MULTIPLE_SUGGEST_ABORT; + } + } + ////////////////////////////////////////////// + // TODO: Remove the safety net above // + ////////////////////////////////////////////// + unsigned short* tempOutputWord = 0; int nextWordLength = 0; // TODO: Optimize init suggestion @@ -555,9 +597,6 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, // Current word int inputWordStartPos = startInputPos; int inputWordLength = i - startInputPos; - if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) { - break; - } const int suggestionFlag = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, startWordIndex, inputWordStartPos, inputWordLength,