From 20b6775acc957896bdb038dfd99794d6cd7cea5a Mon Sep 17 00:00:00 2001 From: Satoshi Kataoka Date: Thu, 24 Jan 2013 17:08:30 +0900 Subject: [PATCH] Refactor most probable string Change-Id: I96597decf5e36d9ce088c34427915f2379255054 --- native/jni/src/proximity_info_state.cpp | 59 +++++-------------- native/jni/src/proximity_info_state.h | 41 +++++++------ native/jni/src/proximity_info_state_utils.cpp | 52 +++++++++++++++- native/jni/src/proximity_info_state_utils.h | 11 +++- 4 files changed, 97 insertions(+), 66 deletions(-) diff --git a/native/jni/src/proximity_info_state.cpp b/native/jni/src/proximity_info_state.cpp index f78b84e88..387b03a24 100644 --- a/native/jni/src/proximity_info_state.cpp +++ b/native/jni/src/proximity_info_state.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include // for memset() +#include // for memset() and memcpy() #include // for debug prints #define LOG_TAG "LatinIME: proximity_info_state.cpp" @@ -59,12 +59,15 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi int pushTouchPointStartIndex = 0; int lastSavedInputSize = 0; mMaxPointToKeyLength = maxPointToKeyLength; + mSampledInputSize = 0; + mMostProbableStringProbability = 0.0f; + if (mIsContinuationPossible && mSampledInputIndice.size() > 1) { // Just update difference. - // Two points prior is never skipped. Thus, we pop 2 input point data here. - pushTouchPointStartIndex = mSampledInputIndice[mSampledInputIndice.size() - 2]; - popInputData(); - popInputData(); + // Previous two points are never skipped. Thus, we pop 2 input point data here. + pushTouchPointStartIndex = ProximityInfoStateUtils::trimLastTwoTouchPoints( + &mSampledInputXs, &mSampledInputYs, &mSampledTimes, &mSampledLengthCache, + &mSampledInputIndice); lastSavedInputSize = mSampledInputXs.size(); } else { // Clear all data. @@ -81,11 +84,11 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi mCharProbabilities.clear(); mDirections.clear(); } + if (DEBUG_GEO_FULL) { AKLOGI("Init ProximityInfoState: reused points = %d, last input size = %d", pushTouchPointStartIndex, lastSavedInputSize); } - mSampledInputSize = 0; if (xCoordinates && yCoordinates) { mSampledInputSize = ProximityInfoStateUtils::updateTouchPoints( @@ -121,6 +124,9 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi ProximityInfoStateUtils::updateSampledSearchKeysVector(mProximityInfo, mSampledInputSize, lastSavedInputSize, &mSampledLengthCache, &mSampledNearKeysVector, &mSampledSearchKeysVector); + mMostProbableStringProbability = ProximityInfoStateUtils::getMostProbableString( + mProximityInfo, mSampledInputSize, &mCharProbabilities, mMostProbableString); + } } @@ -132,8 +138,6 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi // end /////////////////////// - memset(mNormalizedSquaredDistances, NOT_A_DISTANCE, sizeof(mNormalizedSquaredDistances)); - memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord)); mTouchPositionCorrectionEnabled = mSampledInputSize > 0 && mHasTouchPositionCorrectionData && xCoordinates && yCoordinates; if (!isGeometric && pointerId == 0) { @@ -142,8 +146,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi if (mTouchPositionCorrectionEnabled) { ProximityInfoStateUtils::initNormalizedSquaredDistances( mProximityInfo, inputSize, xCoordinates, yCoordinates, mInputProximities, - hasInputCoordinates(), &mSampledInputXs, &mSampledInputYs, - mNormalizedSquaredDistances); + &mSampledInputXs, &mSampledInputYs, mNormalizedSquaredDistances); } } if (DEBUG_GEO_FULL) { @@ -278,16 +281,10 @@ int ProximityInfoState::getAllPossibleChars( } bool ProximityInfoState::isKeyInSerchKeysAfterIndex(const int index, const int keyId) const { - ASSERT(keyId >= 0); - ASSERT(index >= 0 && index < mSampledInputSize); + ASSERT(keyId >= 0 && index >= 0 && index < mSampledInputSize); return mSampledSearchKeysVector[index].test(keyId); } -void ProximityInfoState::popInputData() { - ProximityInfoStateUtils::popInputData(&mSampledInputXs, &mSampledInputYs, &mSampledTimes, - &mSampledLengthCache, &mSampledInputIndice); -} - float ProximityInfoState::getDirection(const int index0, const int index1) const { return ProximityInfoStateUtils::getDirection( &mSampledInputXs, &mSampledInputYs, index0, index1); @@ -313,33 +310,9 @@ float ProximityInfoState::getLineToKeyDistance( keyX, keyY, x0, y0, x1, y1, extend); } -// Get a word that is detected by tracing the most probable string into codePointBuf and -// returns probability of generating the word. float ProximityInfoState::getMostProbableString(int *const codePointBuf) const { - static const float DEMOTION_LOG_PROBABILITY = 0.3f; - int index = 0; - float sumLogProbability = 0.0f; - // TODO: Current implementation is greedy algorithm. DP would be efficient for many cases. - for (int i = 0; i < mSampledInputSize && index < MAX_WORD_LENGTH - 1; ++i) { - float minLogProbability = static_cast(MAX_POINT_TO_KEY_LENGTH); - int character = NOT_AN_INDEX; - for (hash_map_compat::const_iterator it = mCharProbabilities[i].begin(); - it != mCharProbabilities[i].end(); ++it) { - const float logProbability = (it->first != NOT_AN_INDEX) - ? it->second + DEMOTION_LOG_PROBABILITY : it->second; - if (logProbability < minLogProbability) { - minLogProbability = logProbability; - character = it->first; - } - } - if (character != NOT_AN_INDEX) { - codePointBuf[index] = mProximityInfo->getCodePointOf(character); - index++; - } - sumLogProbability += minLogProbability; - } - codePointBuf[index] = '\0'; - return sumLogProbability; + memcpy(codePointBuf, mMostProbableString, sizeof(mMostProbableString)); + return mMostProbableStringProbability; } bool ProximityInfoState::hasSpaceProximity(const int index) const { diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h index 9c4f557e2..7422cb08f 100644 --- a/native/jni/src/proximity_info_state.h +++ b/native/jni/src/proximity_info_state.h @@ -54,10 +54,12 @@ class ProximityInfoState { mSampledInputIndice(), mSampledLengthCache(), mBeelineSpeedPercentiles(), mSampledDistanceCache_G(), mSpeedRates(), mDirections(), mCharProbabilities(), mSampledNearKeysVector(), mSampledSearchKeysVector(), - mTouchPositionCorrectionEnabled(false), mSampledInputSize(0) { + mTouchPositionCorrectionEnabled(false), mSampledInputSize(0), + mMostProbableStringProbability(0.0f) { memset(mInputProximities, 0, sizeof(mInputProximities)); memset(mNormalizedSquaredDistances, 0, sizeof(mNormalizedSquaredDistances)); memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord)); + memset(mMostProbableString, 0, sizeof(mMostProbableString)); } // Non virtual inline destructor -- never inherit this class @@ -67,6 +69,21 @@ class ProximityInfoState { return getProximityCodePointsAt(index)[0]; } + inline bool sameAsTyped(const int *word, int length) const { + if (length != mSampledInputSize) { + return false; + } + const int *inputProximities = mInputProximities; + while (length--) { + if (*inputProximities != *word) { + return false; + } + inputProximities += MAX_PROXIMITY_CHARS_SIZE; + word++; + } + return true; + } + AK_FORCE_INLINE bool existsCodePointInProximityAt(const int index, const int c) const { const int *codePoints = getProximityCodePointsAt(index); int i = 0; @@ -107,21 +124,6 @@ class ProximityInfoState { return mTouchPositionCorrectionEnabled; } - inline bool sameAsTyped(const int *word, int length) const { - if (length != mSampledInputSize) { - return false; - } - const int *inputProximities = mInputProximities; - while (length--) { - if (*inputProximities != *word) { - return false; - } - inputProximities += MAX_PROXIMITY_CHARS_SIZE; - word++; - } - return true; - } - bool isUsed() const { return mSampledInputSize > 0; } @@ -208,14 +210,9 @@ class ProximityInfoState { // Defined here // ///////////////////////////////////////// - bool hasInputCoordinates() const { - return mSampledInputXs.size() > 0 && mSampledInputYs.size() > 0; - } - inline const int *getProximityCodePointsAt(const int index) const { return ProximityInfoStateUtils::getProximityCodePointsAt(mInputProximities, index); } - void popInputData(); // const const ProximityInfo *mProximityInfo; @@ -255,6 +252,8 @@ class ProximityInfoState { int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH]; int mSampledInputSize; int mPrimaryInputWord[MAX_WORD_LENGTH]; + float mMostProbableStringProbability; + int mMostProbableString[MAX_WORD_LENGTH]; }; } // namespace latinime #endif // LATINIME_PROXIMITY_INFO_STATE_H diff --git a/native/jni/src/proximity_info_state_utils.cpp b/native/jni/src/proximity_info_state_utils.cpp index dd8f1bc79..9f85743e5 100644 --- a/native/jni/src/proximity_info_state_utils.cpp +++ b/native/jni/src/proximity_info_state_utils.cpp @@ -15,6 +15,7 @@ */ #include +#include // for memset() #include // for debug prints #include @@ -26,6 +27,17 @@ namespace latinime { +/* static */ int ProximityInfoStateUtils::trimLastTwoTouchPoints(std::vector *sampledInputXs, + std::vector *sampledInputYs, std::vector *sampledInputTimes, + std::vector *sampledLengthCache, std::vector *sampledInputIndice) { + const int nextStartIndex = (*sampledInputIndice)[sampledInputIndice->size() - 2]; + popInputData(sampledInputXs, sampledInputYs, sampledInputTimes, sampledLengthCache, + sampledInputIndice); + popInputData(sampledInputXs, sampledInputYs, sampledInputTimes, sampledLengthCache, + sampledInputIndice); + return nextStartIndex; +} + /* static */ int ProximityInfoStateUtils::updateTouchPoints(const int mostCommonKeyWidth, const ProximityInfo *const proximityInfo, const int maxPointToKeyLength, const int *const inputProximities, const int *const inputXCoordinates, @@ -133,6 +145,7 @@ namespace latinime { /* static */ void ProximityInfoStateUtils::initPrimaryInputWord( const int inputSize, const int *const inputProximities, int *primaryInputWord) { + memset(primaryInputWord, 0, sizeof(primaryInputWord[0]) * MAX_WORD_LENGTH); for (int i = 0; i < inputSize; ++i) { primaryInputWord[i] = getPrimaryCodePointAt(inputProximities, i); } @@ -171,10 +184,13 @@ namespace latinime { /* static */ void ProximityInfoStateUtils::initNormalizedSquaredDistances( const ProximityInfo *const proximityInfo, const int inputSize, const int *inputXCoordinates, const int *inputYCoordinates, - const int *const inputProximities, const bool hasInputCoordinates, + const int *const inputProximities, const std::vector *const sampledInputXs, const std::vector *const sampledInputYs, int *normalizedSquaredDistances) { + memset(normalizedSquaredDistances, NOT_A_DISTANCE, + sizeof(normalizedSquaredDistances[0]) * MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH); + const bool hasInputCoordinates = sampledInputXs->size() > 0 && sampledInputYs->size() > 0; for (int i = 0; i < inputSize; ++i) { const int *proximityCodePoints = getProximityCodePointsAt(inputProximities, i); const int primaryKey = proximityCodePoints[0]; @@ -1011,6 +1027,40 @@ namespace latinime { return true; } +// Get a word that is detected by tracing the most probable string into codePointBuf and +// returns probability of generating the word. +/* static */ float ProximityInfoStateUtils::getMostProbableString( + const ProximityInfo *const proximityInfo, const int sampledInputSize, + const std::vector > *const charProbabilities, + int *const codePointBuf) { + ASSERT(charProbabilities->size() >= 0 && sampledInputSize >= 0); + memset(codePointBuf, 0, sizeof(codePointBuf[0]) * MAX_WORD_LENGTH); + static const float DEMOTION_LOG_PROBABILITY = 0.3f; + int index = 0; + float sumLogProbability = 0.0f; + // TODO: Current implementation is greedy algorithm. DP would be efficient for many cases. + for (int i = 0; i < sampledInputSize && index < MAX_WORD_LENGTH - 1; ++i) { + float minLogProbability = static_cast(MAX_POINT_TO_KEY_LENGTH); + int character = NOT_AN_INDEX; + for (hash_map_compat::const_iterator it = (*charProbabilities)[i].begin(); + it != (*charProbabilities)[i].end(); ++it) { + const float logProbability = (it->first != NOT_AN_INDEX) + ? it->second + DEMOTION_LOG_PROBABILITY : it->second; + if (logProbability < minLogProbability) { + minLogProbability = logProbability; + character = it->first; + } + } + if (character != NOT_AN_INDEX) { + codePointBuf[index] = proximityInfo->getCodePointOf(character); + index++; + } + sumLogProbability += minLogProbability; + } + codePointBuf[index] = '\0'; + return sumLogProbability; +} + /* static */ void ProximityInfoStateUtils::dump(const bool isGeometric, const int inputSize, const int *const inputXCoordinates, const int *const inputYCoordinates, const int sampledInputSize, const std::vector *const sampledInputXs, diff --git a/native/jni/src/proximity_info_state_utils.h b/native/jni/src/proximity_info_state_utils.h index 931549016..c8f0aeb75 100644 --- a/native/jni/src/proximity_info_state_utils.h +++ b/native/jni/src/proximity_info_state_utils.h @@ -32,6 +32,9 @@ class ProximityInfoStateUtils { typedef hash_map_compat NearKeysDistanceMap; typedef std::bitset NearKeycodesSet; + static int trimLastTwoTouchPoints(std::vector *sampledInputXs, + std::vector *sampledInputYs, std::vector *sampledInputTimes, + std::vector *sampledLengthCache, std::vector *sampledInputIndice); static int updateTouchPoints(const int mostCommonKeyWidth, const ProximityInfo *const proximityInfo, const int maxPointToKeyLength, const int *const inputProximities, @@ -96,7 +99,7 @@ class ProximityInfoStateUtils { static void initNormalizedSquaredDistances( const ProximityInfo *const proximityInfo, const int inputSize, const int *inputXCoordinates, const int *inputYCoordinates, - const int *const inputProximities, const bool hasInputCoordinates, + const int *const inputProximities, const std::vector *const sampledInputXs, const std::vector *const sampledInputYs, int *normalizedSquaredDistances); @@ -113,6 +116,12 @@ class ProximityInfoStateUtils { const std::vector *const sampledInputYs, const std::vector *const sampledTimes, const std::vector *const sampledInputIndices); + // TODO: Move to most_probable_string_utils.h + static float getMostProbableString( + const ProximityInfo *const proximityInfo, const int sampledInputSize, + const std::vector > *const charProbabilities, + int *const codePointBuf); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfoStateUtils);