From 2a2aac2568e3f2da3efc8aeaa392696471d63417 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroynagi Date: Wed, 31 Jul 2013 13:47:23 +0900 Subject: [PATCH] Remove checkFirstCharacter from BigramDictionary. Bug: 10028452 Change-Id: I27b147e83b312d73e975a0b2bc8074b33906e56e --- ...oid_inputmethod_latin_BinaryDictionary.cpp | 2 +- .../core/dictionary/bigram_dictionary.cpp | 58 +++++-------------- .../core/dictionary/bigram_dictionary.h | 7 +-- .../suggest/core/dictionary/dictionary.cpp | 7 +-- .../src/suggest/core/dictionary/dictionary.h | 4 +- 5 files changed, 23 insertions(+), 55 deletions(-) diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 6e1b80ee0..8b46c2644 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -186,7 +186,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j scores, spaceIndices, outputTypes); } else { count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength, - inputCodePoints, inputSize, outputCodePoints, scores, outputTypes); + outputCodePoints, scores, outputTypes); } // Copy back the output values diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index 09eecd348..3248d2141 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -88,21 +88,14 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int /* Parameters : * prevWord: the word before, the one for which we need to look up bigrams. * prevWordLength: its length. - * inputCodePoints: what user typed, in the same format as for UnigramDictionary::getSuggestions. - * inputSize: the size of the codes array. - * bigramCodePoints: an array for output, at the same format as outwords for getSuggestions. - * bigramProbability: an array to output frequencies. + * outBigramCodePoints: an array for output, at the same format as outwords for getSuggestions. + * outBigramProbability: an array to output frequencies. * outputTypes: an array to output types. * This method returns the number of bigrams this word has, for backward compatibility. - * Note: this is not the number of bigrams output in the array, which is the number of - * bigrams this word has WHOSE first letter also matches the letter the user typed. - * TODO: this may not be a sensible thing to do. It makes sense when the bigrams are - * used to match the first letter of the second word, but once the user has typed more - * and the bigrams are used to boost unigram result scores, it makes little sense to - * reduce their scope to the ones that match the first letter. */ -int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, int *inputCodePoints, - int inputSize, int *bigramCodePoints, int *bigramProbability, int *outputTypes) const { +int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLength, + int *const outBigramCodePoints, int *const outBigramProbability, + int *const outputTypes) const { // TODO: remove unused arguments, and refrain from storing stuff in members of this class // TODO: have "in" arguments before "out" ones, and make out args explicit in the name @@ -127,21 +120,16 @@ int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, in getCodePointsAndProbabilityAndReturnCodePointCount( mBinaryDictionaryInfo, bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); - - // inputSize == 0 means we are trying to find bigram predictions. - if (inputSize < 1 || checkFirstCharacter(bigramBuffer, inputCodePoints)) { - const int bigramProbabilityTemp = bigramsIt.getProbability(); - // Due to space constraints, the probability for bigrams is approximate - the lower the - // unigram probability, the worse the precision. The theoritical maximum error in - // resulting probability is 8 - although in the practice it's never bigger than 3 or 4 - // in very bad cases. This means that sometimes, we'll see some bigrams interverted - // here, but it can't get too bad. - const int probability = ProbabilityUtils::computeProbabilityForBigram( - unigramProbability, bigramProbabilityTemp); - addWordBigram(bigramBuffer, length, probability, bigramProbability, bigramCodePoints, - outputTypes); - ++bigramCount; - } + // Due to space constraints, the probability for bigrams is approximate - the lower the + // unigram probability, the worse the precision. The theoritical maximum error in + // resulting probability is 8 - although in the practice it's never bigger than 3 or 4 + // in very bad cases. This means that sometimes, we'll see some bigrams interverted + // here, but it can't get too bad. + const int probability = ProbabilityUtils::computeProbabilityForBigram( + unigramProbability, bigramsIt.getProbability()); + addWordBigram(bigramBuffer, length, probability, outBigramProbability, outBigramCodePoints, + outputTypes); + ++bigramCount; } return min(bigramCount, MAX_RESULTS); } @@ -158,22 +146,6 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in mBinaryDictionaryInfo, pos); } -bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) const { - // Checks whether this word starts with same character or neighboring characters of - // what user typed. - - int maxAlt = MAX_ALTERNATIVES; - const int firstBaseLowerCodePoint = CharUtils::toBaseLowerCase(*word); - while (maxAlt > 0) { - if (CharUtils::toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) { - return true; - } - inputCodePoints++; - maxAlt--; - } - return false; -} - bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const { int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h index 7706a2c22..438c34cac 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h @@ -27,8 +27,8 @@ class BigramDictionary { public: BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo); - int getPredictions(const int *word, int length, int *inputCodePoints, int inputSize, - int *outWords, int *frequencies, int *outputTypes) const; + int getPredictions(const int *word, int length, int *outBigramCodePoints, + int *outBigramProbability, int *outputTypes) const; bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const; ~BigramDictionary(); @@ -37,13 +37,10 @@ class BigramDictionary { void addWordBigram(int *word, int length, int probability, int *bigramProbability, int *bigramCodePoints, int *outputTypes) const; - bool checkFirstCharacter(int *word, int *inputCodePoints) const; int getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const; const BinaryDictionaryInfo *const mBinaryDictionaryInfo; - // TODO: Re-implement proximity correction for bigram correction - static const int MAX_ALTERNATIVES = 1; }; } // namespace latinime #endif // LATINIME_BIGRAM_DICTIONARY_H diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 52e635975..f597f9943 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -77,11 +77,10 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession } } -int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, - int *outWords, int *frequencies, int *outputTypes) const { +int Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies, + int *outputTypes) const { if (length <= 0) return 0; - return mBigramDictionary->getPredictions(word, length, inputCodePoints, inputSize, outWords, - frequencies, outputTypes); + return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes); } int Dictionary::getProbability(const int *word, int length) const { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 1bf24a85b..9f1e0729d 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -62,8 +62,8 @@ class Dictionary { const SuggestOptions *const suggestOptions, int *outWords, int *frequencies, int *spaceIndices, int *outputTypes) const; - int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords, - int *frequencies, int *outputTypes) const; + int getBigrams(const int *word, int length, int *outWords, int *frequencies, + int *outputTypes) const; int getProbability(const int *word, int length) const;