From 527c128309da708d0fdaf7928da833320d1754e9 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroynagi Date: Tue, 11 Jun 2013 17:04:40 +0900 Subject: [PATCH] Remove unused methods. Bug: 8550444 Change-Id: Ie627084143846407ca36df82c2a7f904a0e6f49c --- native/jni/Android.mk | 6 +- ...oid_inputmethod_latin_BinaryDictionary.cpp | 7 +- native/jni/src/obsolete/correction.cpp | 1006 ----------------- native/jni/src/obsolete/correction.h | 398 ------- native/jni/src/obsolete/correction_state.h | 83 -- .../core/dictionary/bigram_dictionary.cpp | 25 - .../core/dictionary/bigram_dictionary.h | 6 +- .../suggest/core/dictionary/binary_format.h | 1 - .../core/dictionary/probability_utils.h | 19 - .../src/suggest/core/layout/proximity_info.h | 5 - .../core/layout/proximity_info_state.cpp | 25 - .../core/layout/proximity_info_state.h | 35 +- .../layout/proximity_info_state_utils.cpp | 42 - .../layout/touch_position_correction_utils.h | 25 - .../suggest/policyimpl/utils/edit_distance.h | 20 + .../utils/autocorrection_threshold_utils.cpp | 105 ++ .../utils/autocorrection_threshold_utils.h | 39 + 17 files changed, 185 insertions(+), 1662 deletions(-) delete mode 100644 native/jni/src/obsolete/correction.cpp delete mode 100644 native/jni/src/obsolete/correction.h delete mode 100644 native/jni/src/obsolete/correction_state.h create mode 100644 native/jni/src/utils/autocorrection_threshold_utils.cpp create mode 100644 native/jni/src/utils/autocorrection_threshold_utils.h diff --git a/native/jni/Android.mk b/native/jni/Android.mk index 9718cf5fb..b70568443 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -47,8 +47,6 @@ LATIN_IME_JNI_SRC_FILES := \ LATIN_IME_CORE_SRC_FILES := \ suggest/core/suggest.cpp \ - $(addprefix obsolete/, \ - correction.cpp) \ $(addprefix suggest/core/dicnode/, \ dic_node.cpp \ dic_node_utils.cpp \ @@ -76,7 +74,9 @@ LATIN_IME_CORE_SRC_FILES := \ typing_suggest_policy.cpp \ typing_traversal.cpp \ typing_weighting.cpp) \ - utils/char_utils.cpp + $(addprefix utils/, \ + char_utils.cpp \ + ranking_algorithm.cpp) LOCAL_SRC_FILES := \ $(LATIN_IME_JNI_SRC_FILES) \ diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 8490e32bc..1225e7f7a 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -22,15 +22,16 @@ #include // for memset() #include #include +#include #include "defines.h" #include "jni.h" #include "jni_common.h" -#include "obsolete/correction.h" #include "suggest/core/dictionary/binary_dictionary_format_utils.h" #include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/suggest_options.h" +#include "utils/autocorrection_threshold_utils.h" namespace latinime { @@ -201,7 +202,7 @@ static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass int afterCodePoints[afterLength]; env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints); env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints); - return Correction::RankingAlgorithm::calcNormalizedScore(beforeCodePoints, beforeLength, + return AutocorrectionThresholdUtils::calcNormalizedScore(beforeCodePoints, beforeLength, afterCodePoints, afterLength, score); } @@ -213,7 +214,7 @@ static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jclass clazz, ji int afterCodePoints[afterLength]; env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints); env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints); - return Correction::RankingAlgorithm::editDistance(beforeCodePoints, beforeLength, + return AutocorrectionThresholdUtils::editDistance(beforeCodePoints, beforeLength, afterCodePoints, afterLength); } diff --git a/native/jni/src/obsolete/correction.cpp b/native/jni/src/obsolete/correction.cpp deleted file mode 100644 index 6b80ed8ea..000000000 --- a/native/jni/src/obsolete/correction.cpp +++ /dev/null @@ -1,1006 +0,0 @@ -/* - * Copyright (C) 2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#define LOG_TAG "LatinIME: correction.cpp" - -#include - -#include "defines.h" -#include "obsolete/correction.h" -#include "suggest/core/layout/proximity_info_state.h" -#include "suggest/core/layout/touch_position_correction_utils.h" -#include "suggest/policyimpl/utils/edit_distance.h" -#include "suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h" -#include "utils/char_utils.h" - -namespace latinime { - -class ProximityInfo; - -// private static const member variables -// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent. -const int Correction::WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE = 80; -const int Correction::WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X = 12; -const int Correction::WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE = 58; -const int Correction::WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE = 50; -const int Correction::WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE = 75; -const int Correction::WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE = 75; -const int Correction::WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE = 70; -const int Correction::FULL_MATCHED_WORDS_PROMOTION_RATE = 120; -const int Correction::WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE = 90; -const int Correction::WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE = 70; -const int Correction::WORDS_WITH_MATCH_SKIP_PROMOTION_RATE = 105; -const int Correction::WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE = 148; -const int Correction::WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER = 3; -const int Correction::CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE = 45; -const int Correction::INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE = 70; -const int Correction::FIRST_CHAR_DIFFERENT_DEMOTION_RATE = 96; -const int Correction::TWO_WORDS_CAPITALIZED_DEMOTION_RATE = 50; -const int Correction::TWO_WORDS_CORRECTION_DEMOTION_BASE = 80; - -///////////////////////////// -// edit distance funcitons // -///////////////////////////// - -inline static void initEditDistance(int *editDistanceTable) { - for (int i = 0; i <= MAX_WORD_LENGTH; ++i) { - editDistanceTable[i] = i; - } -} - -inline static void dumpEditDistance10ForDebug(int *editDistanceTable, - const int editDistanceTableWidth, const int outputLength) { - if (DEBUG_DICT) { - AKLOGI("EditDistanceTable"); - for (int i = 0; i <= 10; ++i) { - int c[11]; - for (int j = 0; j <= 10; ++j) { - if (j < editDistanceTableWidth + 1 && i < outputLength + 1) { - c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j]; - } else { - c[j] = -1; - } - } - AKLOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]", - c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]); - (void)c; // To suppress compiler warning - } - } -} - -inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth, - const int outputLength, const int inputSize) { - if (DEBUG_EDIT_DISTANCE) { - AKLOGI("getCurrentEditDistance %d, %d", inputSize, outputLength); - } - return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputSize]; -} - -//////////////// -// Correction // -//////////////// - -void Correction::resetCorrection() { - mTotalTraverseCount = 0; -} - -void Correction::initCorrection(const ProximityInfo *pi, const int inputSize, const int maxDepth) { - mProximityInfo = pi; - mInputSize = inputSize; - mMaxDepth = maxDepth; - mMaxEditDistance = mInputSize < 5 ? 2 : mInputSize / 2; - // TODO: This is not supposed to be required. Check what's going wrong with - // editDistance[0 ~ MAX_WORD_LENGTH] - initEditDistance(mEditDistanceTable); -} - -void Correction::initCorrectionState( - const int rootPos, const int childCount, const bool traverseAll) { - latinime::initCorrectionState(mCorrectionStates, rootPos, childCount, traverseAll); - // TODO: remove - mCorrectionStates[0].mTransposedPos = mTransposedPos; - mCorrectionStates[0].mExcessivePos = mExcessivePos; - mCorrectionStates[0].mSkipPos = mSkipPos; -} - -void Correction::setCorrectionParams(const int skipPos, const int excessivePos, - const int transposedPos, const int spaceProximityPos, const int missingSpacePos, - const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors) { - // TODO: remove - mTransposedPos = transposedPos; - mExcessivePos = excessivePos; - mSkipPos = skipPos; - // TODO: remove - mCorrectionStates[0].mTransposedPos = transposedPos; - mCorrectionStates[0].mExcessivePos = excessivePos; - mCorrectionStates[0].mSkipPos = skipPos; - - mSpaceProximityPos = spaceProximityPos; - mMissingSpacePos = missingSpacePos; - mUseFullEditDistance = useFullEditDistance; - mDoAutoCompletion = doAutoCompletion; - mMaxErrors = maxErrors; -} - -void Correction::checkState() const { - if (DEBUG_DICT) { - int inputCount = 0; - if (mSkipPos >= 0) ++inputCount; - if (mExcessivePos >= 0) ++inputCount; - if (mTransposedPos >= 0) ++inputCount; - } -} - -bool Correction::sameAsTyped() const { - return mProximityInfoState.sameAsTyped(mWord, mOutputIndex); -} - -int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, - const int wordCount, const bool isSpaceProximity, const int *word) const { - return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray, - wordCount, this, isSpaceProximity, word); -} - -int Correction::getFinalProbability(const int probability, int **word, int *wordLength) { - return getFinalProbabilityInternal(probability, word, wordLength, mInputSize); -} - -int Correction::getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength, - const int inputSize) { - return getFinalProbabilityInternal(probability, word, wordLength, inputSize); -} - -bool Correction::initProcessState(const int outputIndex) { - if (mCorrectionStates[outputIndex].mChildCount <= 0) { - return false; - } - mOutputIndex = outputIndex; - --(mCorrectionStates[outputIndex].mChildCount); - mInputIndex = mCorrectionStates[outputIndex].mInputIndex; - mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes; - - mEquivalentCharCount = mCorrectionStates[outputIndex].mEquivalentCharCount; - mProximityCount = mCorrectionStates[outputIndex].mProximityCount; - mTransposedCount = mCorrectionStates[outputIndex].mTransposedCount; - mExcessiveCount = mCorrectionStates[outputIndex].mExcessiveCount; - mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount; - mLastCharExceeded = mCorrectionStates[outputIndex].mLastCharExceeded; - - mTransposedPos = mCorrectionStates[outputIndex].mTransposedPos; - mExcessivePos = mCorrectionStates[outputIndex].mExcessivePos; - mSkipPos = mCorrectionStates[outputIndex].mSkipPos; - - mMatching = false; - mProximityMatching = false; - mAdditionalProximityMatching = false; - mTransposing = false; - mExceeding = false; - mSkipping = false; - - return true; -} - -int Correction::goDownTree(const int parentIndex, const int childCount, const int firstChildPos) { - mCorrectionStates[mOutputIndex].mParentIndex = parentIndex; - mCorrectionStates[mOutputIndex].mChildCount = childCount; - mCorrectionStates[mOutputIndex].mSiblingPos = firstChildPos; - return mOutputIndex; -} - -// TODO: remove -int Correction::getInputIndex() const { - return mInputIndex; -} - -bool Correction::needsToPrune() const { - // TODO: use edit distance here - return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance - // Allow one char longer word for missing character - || (!mDoAutoCompletion && (mOutputIndex > mInputSize)); -} - -inline static bool isEquivalentChar(ProximityType type) { - return type == MATCH_CHAR; -} - -inline static bool isProximityCharOrEquivalentChar(ProximityType type) { - return type == MATCH_CHAR || type == PROXIMITY_CHAR; -} - -Correction::CorrectionType Correction::processCharAndCalcState(const int c, const bool isTerminal) { - const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); - if (correctionCount > mMaxErrors) { - return processUnrelatedCorrectionType(); - } - - // TODO: Change the limit if we'll allow two or more corrections - const bool noCorrectionsHappenedSoFar = correctionCount == 0; - const bool canTryCorrection = noCorrectionsHappenedSoFar; - int proximityIndex = 0; - mDistances[mOutputIndex] = NOT_A_DISTANCE; - - // Skip checking this node - if (mNeedsToTraverseAllNodes || isSingleQuote(c)) { - bool incremented = false; - if (mLastCharExceeded && mInputIndex == mInputSize - 1) { - // TODO: Do not check the proximity if EditDistance exceeds the threshold - const ProximityType matchId = mProximityInfoState.getProximityType( - mInputIndex, c, true, &proximityIndex); - if (isEquivalentChar(matchId)) { - mLastCharExceeded = false; - --mExcessiveCount; - mDistances[mOutputIndex] = - mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0); - } else if (matchId == PROXIMITY_CHAR) { - mLastCharExceeded = false; - --mExcessiveCount; - ++mProximityCount; - mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance( - mInputIndex, proximityIndex); - } - if (!isSingleQuote(c)) { - incrementInputIndex(); - incremented = true; - } - } - return processSkipChar(c, isTerminal, incremented); - } - - // Check possible corrections. - if (mExcessivePos >= 0) { - if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) { - mExcessivePos = mOutputIndex; - } - if (mExcessivePos < mInputSize - 1) { - mExceeding = mExcessivePos == mInputIndex && canTryCorrection; - } - } - - if (mSkipPos >= 0) { - if (mSkippedCount == 0 && mSkipPos < mOutputIndex) { - if (DEBUG_DICT) { - // TODO: Enable this assertion. - //ASSERT(mSkipPos == mOutputIndex - 1); - } - mSkipPos = mOutputIndex; - } - mSkipping = mSkipPos == mOutputIndex && canTryCorrection; - } - - if (mTransposedPos >= 0) { - if (mTransposedCount == 0 && mTransposedPos < mOutputIndex) { - mTransposedPos = mOutputIndex; - } - if (mTransposedPos < mInputSize - 1) { - mTransposing = mInputIndex == mTransposedPos && canTryCorrection; - } - } - - bool secondTransposing = false; - if (mTransposedCount % 2 == 1) { - if (isEquivalentChar(mProximityInfoState.getProximityType( - mInputIndex - 1, c, false))) { - ++mTransposedCount; - secondTransposing = true; - } else if (mCorrectionStates[mOutputIndex].mExceeding) { - --mTransposedCount; - ++mExcessiveCount; - --mExcessivePos; - incrementInputIndex(); - } else { - --mTransposedCount; - if (DEBUG_CORRECTION - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) - && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 - || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { - DUMP_WORD(mWord, mOutputIndex); - AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, - mTransposedCount, mExcessiveCount, c); - } - return processUnrelatedCorrectionType(); - } - } - - // TODO: Change the limit if we'll allow two or more proximity chars with corrections - // Work around: When the mMaxErrors is 1, we only allow just one error - // including proximity correction. - const bool checkProximityChars = (mMaxErrors > 1) - ? (noCorrectionsHappenedSoFar || mProximityCount == 0) - : (noCorrectionsHappenedSoFar && mProximityCount == 0); - - ProximityType matchedProximityCharId = secondTransposing - ? MATCH_CHAR - : mProximityInfoState.getProximityType( - mInputIndex, c, checkProximityChars, &proximityIndex); - - if (SUBSTITUTION_CHAR == matchedProximityCharId - || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { - if (canTryCorrection && mOutputIndex > 0 - && mCorrectionStates[mOutputIndex].mProximityMatching - && mCorrectionStates[mOutputIndex].mExceeding - && isEquivalentChar(mProximityInfoState.getProximityType( - mInputIndex, mWord[mOutputIndex - 1], false))) { - if (DEBUG_CORRECTION - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) - && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 - || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { - AKLOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]); - } - // Conversion p->e - // Example: - // wearth -> earth - // px -> (E)mmmmm - ++mExcessiveCount; - --mProximityCount; - mExcessivePos = mOutputIndex - 1; - ++mInputIndex; - // Here, we are doing something equivalent to matchedProximityCharId, - // but we already know that "excessive char correction" just happened - // so that we just need to check "mProximityCount == 0". - matchedProximityCharId = mProximityInfoState.getProximityType( - mInputIndex, c, mProximityCount == 0, &proximityIndex); - } - } - - if (SUBSTITUTION_CHAR == matchedProximityCharId - || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { - if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { - mAdditionalProximityMatching = true; - } - // TODO: Optimize - // As the current char turned out to be an unrelated char, - // we will try other correction-types. Please note that mCorrectionStates[mOutputIndex] - // here refers to the previous state. - if (mInputIndex < mInputSize - 1 && mOutputIndex > 0 && mTransposedCount > 0 - && !mCorrectionStates[mOutputIndex].mTransposing - && mCorrectionStates[mOutputIndex - 1].mTransposing - && isEquivalentChar(mProximityInfoState.getProximityType( - mInputIndex, mWord[mOutputIndex - 1], false)) - && isEquivalentChar( - mProximityInfoState.getProximityType(mInputIndex + 1, c, false))) { - // Conversion t->e - // Example: - // occaisional -> occa sional - // mmmmttx -> mmmm(E)mmmmmm - mTransposedCount -= 2; - ++mExcessiveCount; - ++mInputIndex; - } else if (mOutputIndex > 0 && mInputIndex > 0 && mTransposedCount > 0 - && !mCorrectionStates[mOutputIndex].mTransposing - && mCorrectionStates[mOutputIndex - 1].mTransposing - && isEquivalentChar( - mProximityInfoState.getProximityType(mInputIndex - 1, c, false))) { - // Conversion t->s - // Example: - // chcolate -> chocolate - // mmttx -> mmsmmmmmm - mTransposedCount -= 2; - ++mSkippedCount; - --mInputIndex; - } else if (canTryCorrection && mInputIndex > 0 - && mCorrectionStates[mOutputIndex].mProximityMatching - && mCorrectionStates[mOutputIndex].mSkipping - && isEquivalentChar( - mProximityInfoState.getProximityType(mInputIndex - 1, c, false))) { - // Conversion p->s - // Note: This logic tries saving cases like contrst --> contrast -- "a" is one of - // proximity chars of "s", but it should rather be handled as a skipped char. - ++mSkippedCount; - --mProximityCount; - return processSkipChar(c, isTerminal, false); - } else if (mInputIndex - 1 < mInputSize - && mSkippedCount > 0 - && mCorrectionStates[mOutputIndex].mSkipping - && mCorrectionStates[mOutputIndex].mAdditionalProximityMatching - && isProximityCharOrEquivalentChar( - mProximityInfoState.getProximityType(mInputIndex + 1, c, false))) { - // Conversion s->a - incrementInputIndex(); - --mSkippedCount; - mProximityMatching = true; - ++mProximityCount; - mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO; - } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputSize - && isEquivalentChar( - mProximityInfoState.getProximityType(mInputIndex + 1, c, false))) { - // 1.2. Excessive or transpose correction - if (mTransposing) { - ++mTransposedCount; - } else { - ++mExcessiveCount; - incrementInputIndex(); - } - if (DEBUG_CORRECTION - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) - && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 - || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { - DUMP_WORD(mWord, mOutputIndex); - if (mTransposing) { - AKLOGI("TRANSPOSE: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, - mTransposedCount, mExcessiveCount, c); - } else { - AKLOGI("EXCEED: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, - mTransposedCount, mExcessiveCount, c); - } - } - } else if (mSkipping) { - // 3. Skip correction - ++mSkippedCount; - if (DEBUG_CORRECTION - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) - && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 - || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { - AKLOGI("SKIP: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, - mTransposedCount, mExcessiveCount, c); - } - return processSkipChar(c, isTerminal, false); - } else if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { - // As a last resort, use additional proximity characters - mProximityMatching = true; - ++mProximityCount; - mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO; - if (DEBUG_CORRECTION - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) - && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 - || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { - AKLOGI("ADDITIONALPROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, - mTransposedCount, mExcessiveCount, c); - } - } else { - if (DEBUG_CORRECTION - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) - && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 - || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { - DUMP_WORD(mWord, mOutputIndex); - AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, - mTransposedCount, mExcessiveCount, c); - } - return processUnrelatedCorrectionType(); - } - } else if (secondTransposing) { - // If inputIndex is greater than mInputSize, that means there is no - // proximity chars. So, we don't need to check proximity. - mMatching = true; - } else if (isEquivalentChar(matchedProximityCharId)) { - mMatching = true; - ++mEquivalentCharCount; - mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0); - } else if (PROXIMITY_CHAR == matchedProximityCharId) { - mProximityMatching = true; - ++mProximityCount; - mDistances[mOutputIndex] = - mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, proximityIndex); - if (DEBUG_CORRECTION - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) - && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 - || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { - AKLOGI("PROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, - mTransposedCount, mExcessiveCount, c); - } - } - - addCharToCurrentWord(c); - - // 4. Last char excessive correction - mLastCharExceeded = mExcessiveCount == 0 && mSkippedCount == 0 && mTransposedCount == 0 - && mProximityCount == 0 && (mInputIndex == mInputSize - 2); - const bool isSameAsUserTypedLength = (mInputSize == mInputIndex + 1) || mLastCharExceeded; - if (mLastCharExceeded) { - ++mExcessiveCount; - } - - // Start traversing all nodes after the index exceeds the user typed length - if (isSameAsUserTypedLength) { - startToTraverseAllNodes(); - } - - const bool needsToTryOnTerminalForTheLastPossibleExcessiveChar = - mExceeding && mInputIndex == mInputSize - 2; - - // Finally, we are ready to go to the next character, the next "virtual node". - // We should advance the input index. - // We do this in this branch of the 'if traverseAllNodes' because we are still matching - // characters to input; the other branch is not matching them but searching for - // completions, this is why it does not have to do it. - incrementInputIndex(); - // Also, the next char is one "virtual node" depth more than this char. - incrementOutputIndex(); - - if ((needsToTryOnTerminalForTheLastPossibleExcessiveChar - || isSameAsUserTypedLength) && isTerminal) { - mTerminalInputIndex = mInputIndex - 1; - mTerminalOutputIndex = mOutputIndex - 1; - if (DEBUG_CORRECTION - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) - && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { - DUMP_WORD(mWord, mOutputIndex); - AKLOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, - mTransposedCount, mExcessiveCount, c); - } - return ON_TERMINAL; - } else { - mTerminalInputIndex = mInputIndex - 1; - mTerminalOutputIndex = mOutputIndex - 1; - return NOT_ON_TERMINAL; - } -} - -inline static int getQuoteCount(const int *word, const int length) { - int quoteCount = 0; - for (int i = 0; i < length; ++i) { - if (word[i] == KEYCODE_SINGLE_QUOTE) { - ++quoteCount; - } - } - return quoteCount; -} - -inline static bool isUpperCase(unsigned short c) { - return CharUtils::isAsciiUpper(CharUtils::toBaseCodePoint(c)); -} - -////////////////////// -// RankingAlgorithm // -////////////////////// - -/* static */ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex, - const int outputIndex, const int freq, int *editDistanceTable, const Correction *correction, - const int inputSize) { - const int excessivePos = correction->getExcessivePos(); - const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; - const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER; - const ProximityInfoState *proximityInfoState = &correction->mProximityInfoState; - const int skippedCount = correction->mSkippedCount; - const int transposedCount = correction->mTransposedCount / 2; - const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2; - const int proximityMatchedCount = correction->mProximityCount; - const bool lastCharExceeded = correction->mLastCharExceeded; - const bool useFullEditDistance = correction->mUseFullEditDistance; - const int outputLength = outputIndex + 1; - if (skippedCount >= inputSize || inputSize == 0) { - return -1; - } - - // TODO: find more robust way - bool sameLength = lastCharExceeded ? (inputSize == inputIndex + 2) - : (inputSize == inputIndex + 1); - - // TODO: use mExcessiveCount - const int matchCount = inputSize - correction->mProximityCount - excessiveCount; - - const int *word = correction->mWord; - const bool skipped = skippedCount > 0; - - const int quoteDiffCount = max(0, getQuoteCount(word, outputLength) - - getQuoteCount(proximityInfoState->getPrimaryInputWord(), inputSize)); - - // TODO: Calculate edit distance for transposed and excessive - int ed = 0; - if (DEBUG_DICT_FULL) { - dumpEditDistance10ForDebug(editDistanceTable, correction->mInputSize, outputLength); - } - int adjustedProximityMatchedCount = proximityMatchedCount; - - int finalFreq = freq; - - if (DEBUG_CORRECTION_FREQ - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputSize)) { - AKLOGI("FinalFreq0: %d", finalFreq); - } - // TODO: Optimize this. - if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) { - ed = getCurrentEditDistance(editDistanceTable, correction->mInputSize, outputLength, - inputSize) - transposedCount; - - const int matchWeight = powerIntCapped(typedLetterMultiplier, - max(inputSize, outputLength) - ed); - multiplyIntCapped(matchWeight, &finalFreq); - - // TODO: Demote further if there are two or more excessive chars with longer user input? - if (inputSize > outputLength) { - multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq); - } - - ed = max(0, ed - quoteDiffCount); - adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputSize)), - proximityMatchedCount); - if (transposedCount <= 0) { - if (ed == 1 && (inputSize == outputLength - 1 || inputSize == outputLength + 1)) { - // Promote a word with just one skipped or excessive char - if (sameLength) { - multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE - + WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER * outputLength, - &finalFreq); - } else { - multiplyIntCapped(typedLetterMultiplier, &finalFreq); - } - } else if (ed == 0) { - multiplyIntCapped(typedLetterMultiplier, &finalFreq); - sameLength = true; - } - } - } else { - const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount); - multiplyIntCapped(matchWeight, &finalFreq); - } - - if (proximityInfoState->getProximityType(0, word[0], true) == SUBSTITUTION_CHAR) { - multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq); - } - - /////////////////////////////////////////////// - // Promotion and Demotion for each correction - - // Demotion for a word with missing character - if (skipped) { - const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE - * (10 * inputSize - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X) - / (10 * inputSize - - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10); - if (DEBUG_DICT_FULL) { - AKLOGI("Demotion rate for missing character is %d.", demotionRate); - } - multiplyRate(demotionRate, &finalFreq); - } - - // Demotion for a word with transposed character - if (transposedCount > 0) multiplyRate( - WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq); - - // Demotion for a word with excessive character - if (excessiveCount > 0) { - multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq); - if (!lastCharExceeded && !proximityInfoState->existsAdjacentProximityChars(excessivePos)) { - if (DEBUG_DICT_FULL) { - AKLOGI("Double excessive demotion"); - } - // If an excessive character is not adjacent to the left char or the right char, - // we will demote this word. - multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq); - } - } - - int additionalProximityCount = 0; - // Demote additional proximity characters - for (int i = 0; i < outputLength; ++i) { - const int squaredDistance = correction->mDistances[i]; - if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) { - ++additionalProximityCount; - } - } - - const bool performTouchPositionCorrection = - CALIBRATE_SCORE_BY_TOUCH_COORDINATES - && proximityInfoState->touchPositionCorrectionEnabled() - && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0 - && additionalProximityCount == 0; - - // Score calibration by touch coordinates is being done only for pure-fat finger typing error - // cases. - // TODO: Remove this constraint. - if (performTouchPositionCorrection) { - for (int i = 0; i < outputLength; ++i) { - const int squaredDistance = correction->mDistances[i]; - if (i < adjustedProximityMatchedCount) { - multiplyIntCapped(typedLetterMultiplier, &finalFreq); - } - const float factor = TouchPositionCorrectionUtils::getLengthScalingFactor( - static_cast(squaredDistance)); - if (factor > 0.0f) { - multiplyRate(static_cast(factor * 100.0f), &finalFreq); - } else if (squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) { - multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); - } - } - } else { - // Promotion for a word with proximity characters - for (int i = 0; i < adjustedProximityMatchedCount; ++i) { - // A word with proximity corrections - if (DEBUG_DICT_FULL) { - AKLOGI("Found a proximity correction."); - } - multiplyIntCapped(typedLetterMultiplier, &finalFreq); - if (i < additionalProximityCount) { - multiplyRate(WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); - } else { - multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); - } - } - } - - // If the user types too many(three or more) proximity characters with additional proximity - // character,do not treat as the same length word. - if (sameLength && additionalProximityCount > 0 && (adjustedProximityMatchedCount >= 3 - || transposedCount > 0 || skipped || excessiveCount > 0)) { - sameLength = false; - } - - const int errorCount = adjustedProximityMatchedCount > 0 - ? adjustedProximityMatchedCount - : (proximityMatchedCount + transposedCount); - multiplyRate( - 100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputSize, &finalFreq); - - // Promotion for an exactly matched word - if (ed == 0) { - // Full exact match - if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0 - && quoteDiffCount == 0 && additionalProximityCount == 0) { - finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq); - } - } - - // Promote a word with no correction - if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0 - && additionalProximityCount == 0) { - multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq); - } - - // TODO: Check excessive count and transposed count - // TODO: Remove this if possible - /* - If the last character of the user input word is the same as the next character - of the output word, and also all of characters of the user input are matched - to the output word, we'll promote that word a bit because - that word can be considered the combination of skipped and matched characters. - This means that the 'sm' pattern wins over the 'ma' pattern. - e.g.) - shel -> shell [mmmma] or [mmmsm] - hel -> hello [mmmaa] or [mmsma] - m ... matching - s ... skipping - a ... traversing all - t ... transposing - e ... exceeding - p ... proximity matching - */ - if (matchCount == inputSize && matchCount >= 2 && !skipped - && word[matchCount] == word[matchCount - 1]) { - multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq); - } - - // TODO: Do not use sameLength? - if (sameLength) { - multiplyIntCapped(fullWordMultiplier, &finalFreq); - } - - if (useFullEditDistance && outputLength > inputSize + 1) { - const int diff = outputLength - inputSize - 1; - const int divider = diff < 31 ? 1 << diff : S_INT_MAX; - finalFreq = divider > finalFreq ? 1 : finalFreq / divider; - } - - if (DEBUG_DICT_FULL) { - AKLOGI("calc: %d, %d", outputLength, sameLength); - } - - if (DEBUG_CORRECTION_FREQ - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputSize)) { - DUMP_WORD(correction->getPrimaryInputWord(), inputSize); - DUMP_WORD(correction->mWord, outputLength); - AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d, A%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount, - skippedCount, transposedCount, excessiveCount, additionalProximityCount, - outputLength, lastCharExceeded, sameLength, quoteDiffCount, ed, finalFreq); - } - - return finalFreq; -} - -/* static */ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(const int *freqArray, - const int *wordLengthArray, const int wordCount, const Correction *correction, - const bool isSpaceProximity, const int *word) { - const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; - - bool firstCapitalizedWordDemotion = false; - bool secondCapitalizedWordDemotion = false; - - { - // TODO: Handle multiple capitalized word demotion properly - const int firstWordLength = wordLengthArray[0]; - const int secondWordLength = wordLengthArray[1]; - if (firstWordLength >= 2) { - firstCapitalizedWordDemotion = isUpperCase(word[0]); - } - - if (secondWordLength >= 2) { - // FIXME: word[firstWordLength + 1] is incorrect. - secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]); - } - } - - - const bool capitalizedWordDemotion = - firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion; - - int totalLength = 0; - int totalFreq = 0; - for (int i = 0; i < wordCount; ++i) { - const int wordLength = wordLengthArray[i]; - if (wordLength <= 0) { - return 0; - } - totalLength += wordLength; - const int demotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (wordLength + 1); - int tempFirstFreq = freqArray[i]; - multiplyRate(demotionRate, &tempFirstFreq); - totalFreq += tempFirstFreq; - } - - if (totalLength <= 0 || totalFreq <= 0) { - return 0; - } - - // TODO: Currently totalFreq is adjusted to two word metrix. - // Promote pairFreq with multiplying by 2, because the word length is the same as the typed - // length. - totalFreq = totalFreq * 2 / wordCount; - if (wordCount > 2) { - // Safety net for 3+ words -- Caveats: many heuristics and workarounds here. - int oneLengthCounter = 0; - int twoLengthCounter = 0; - for (int i = 0; i < wordCount; ++i) { - const int wordLength = wordLengthArray[i]; - // TODO: Use bigram instead of this safety net - if (i < wordCount - 1) { - const int nextWordLength = wordLengthArray[i + 1]; - if (wordLength == 1 && nextWordLength == 2) { - // Safety net to filter 1 length and 2 length sequential words - return 0; - } - } - const int freq = freqArray[i]; - // Demote too short weak words - if (wordLength <= 4 && freq <= SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ) { - multiplyRate(100 * freq / MAX_PROBABILITY, &totalFreq); - } - if (wordLength == 1) { - ++oneLengthCounter; - } else if (wordLength == 2) { - ++twoLengthCounter; - } - if (oneLengthCounter >= 2 || (oneLengthCounter + twoLengthCounter) >= 4) { - // Safety net to filter too many short words - return 0; - } - } - multiplyRate(MULTIPLE_WORDS_DEMOTION_RATE, &totalFreq); - } - - // This is a workaround to try offsetting the not-enough-demotion which will be done in - // calcNormalizedScore in Utils.java. - // In calcNormalizedScore the score will be demoted by (1 - 1 / length) - // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by - // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length)) - const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength); - multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq); - - // At this moment, totalFreq is calculated by the following formula: - // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1))) - // * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1)) - - multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq); - - // This is another workaround to offset the demotion which will be done in - // calcNormalizedScore in Utils.java. - // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote - // the same amount because we already have adjusted the synthetic freq of this "missing or - // mistyped space" suggestion candidate above in this method. - const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength); - multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq); - - if (isSpaceProximity) { - // A word pair with one space proximity correction - if (DEBUG_DICT) { - AKLOGI("Found a word pair with space proximity correction."); - } - multiplyIntCapped(typedLetterMultiplier, &totalFreq); - multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq); - } - - if (isSpaceProximity) { - multiplyRate(WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE, &totalFreq); - } else { - multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq); - } - - if (capitalizedWordDemotion) { - multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq); - } - - if (DEBUG_CORRECTION_FREQ) { - AKLOGI("Multiple words (%d, %d) (%d, %d) %d, %d", freqArray[0], freqArray[1], - wordLengthArray[0], wordLengthArray[1], capitalizedWordDemotion, totalFreq); - DUMP_WORD(word, wordLengthArray[0]); - } - - return totalFreq; -} - -/* static */ int Correction::RankingAlgorithm::editDistance(const int *before, - const int beforeLength, const int *after, const int afterLength) { - const DamerauLevenshteinEditDistancePolicy daemaruLevenshtein( - before, beforeLength, after, afterLength); - return static_cast(EditDistance::getEditDistance(&daemaruLevenshtein)); -} - - -// In dictionary.cpp, getSuggestion() method, -// When USE_SUGGEST_INTERFACE_FOR_TYPING is true: -// -// // TODO: Revise the following logic thoroughly by referring to the logic -// // marked as "Otherwise" below. -// SUGGEST_INTERFACE_OUTPUT_SCALE was multiplied to the original suggestion scores to convert -// them to integers. -// score = (int)((original score) * SUGGEST_INTERFACE_OUTPUT_SCALE) -// Undo the scaling here to recover the original score. -// normalizedScore = ((float)score) / SUGGEST_INTERFACE_OUTPUT_SCALE -// -// Otherwise: suggestion scores are computed using the below formula. -// original score -// := powf(mTypedLetterMultiplier (this is defined 2), -// (the number of matched characters between typed word and suggested word)) -// * (individual word's score which defined in the unigram dictionary, -// and this score is defined in range [0, 255].) -// Then, the following processing is applied. -// - If the dictionary word is matched up to the point of the user entry -// (full match up to min(before.length(), after.length()) -// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2) -// - If the word is a true full match except for differences in accents or -// capitalization, then treat it as if the score was 255. -// - If before.length() == after.length() -// => multiply by mFullWordMultiplier (this is defined 2)) -// So, maximum original score is powf(2, min(before.length(), after.length())) * 255 * 2 * 1.2 -// For historical reasons we ignore the 1.2 modifier (because the measure for a good -// autocorrection threshold was done at a time when it didn't exist). This doesn't change -// the result. -// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2. - -/* static */ float Correction::RankingAlgorithm::calcNormalizedScore(const int *before, - const int beforeLength, const int *after, const int afterLength, const int score) { - if (0 == beforeLength || 0 == afterLength) { - return 0.0f; - } - const int distance = editDistance(before, beforeLength, after, afterLength); - int spaceCount = 0; - for (int i = 0; i < afterLength; ++i) { - if (after[i] == KEYCODE_SPACE) { - ++spaceCount; - } - } - - if (spaceCount == afterLength) { - return 0.0f; - } - - // add a weight based on edit distance. - // distance <= max(afterLength, beforeLength) == afterLength, - // so, 0 <= distance / afterLength <= 1 - const float weight = 1.0f - static_cast(distance) / static_cast(afterLength); - - // TODO: Revise the following logic thoroughly by referring to... - if (true /* USE_SUGGEST_INTERFACE_FOR_TYPING */) { - return (static_cast(score) / SUGGEST_INTERFACE_OUTPUT_SCALE) * weight; - } - // ...this logic. - const float maxScore = score >= S_INT_MAX ? static_cast(S_INT_MAX) - : static_cast(MAX_INITIAL_SCORE) - * powf(static_cast(TYPED_LETTER_MULTIPLIER), - static_cast(min(beforeLength, afterLength - spaceCount))) - * static_cast(FULL_WORD_MULTIPLIER); - - return (static_cast(score) / maxScore) * weight; -} -} // namespace latinime diff --git a/native/jni/src/obsolete/correction.h b/native/jni/src/obsolete/correction.h deleted file mode 100644 index 47dcef2d7..000000000 --- a/native/jni/src/obsolete/correction.h +++ /dev/null @@ -1,398 +0,0 @@ -/* - * Copyright (C) 2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_CORRECTION_H -#define LATINIME_CORRECTION_H - -#include // for memset() - -#include "defines.h" -#include "obsolete/correction_state.h" -#include "suggest/core/layout/proximity_info_state.h" -#include "utils/char_utils.h" - -namespace latinime { - -class ProximityInfo; - -class Correction { - public: - typedef enum { - TRAVERSE_ALL_ON_TERMINAL, - TRAVERSE_ALL_NOT_ON_TERMINAL, - UNRELATED, - ON_TERMINAL, - NOT_ON_TERMINAL - } CorrectionType; - - Correction() - : mProximityInfo(0), mUseFullEditDistance(false), mDoAutoCompletion(false), - mMaxEditDistance(0), mMaxDepth(0), mInputSize(0), mSpaceProximityPos(0), - mMissingSpacePos(0), mTerminalInputIndex(0), mTerminalOutputIndex(0), mMaxErrors(0), - mTotalTraverseCount(0), mNeedsToTraverseAllNodes(false), mOutputIndex(0), - mInputIndex(0), mEquivalentCharCount(0), mProximityCount(0), mExcessiveCount(0), - mTransposedCount(0), mSkippedCount(0), mTransposedPos(0), mExcessivePos(0), - mSkipPos(0), mLastCharExceeded(false), mMatching(false), mProximityMatching(false), - mAdditionalProximityMatching(false), mExceeding(false), mTransposing(false), - mSkipping(false), mProximityInfoState() { - memset(mWord, 0, sizeof(mWord)); - memset(mDistances, 0, sizeof(mDistances)); - memset(mEditDistanceTable, 0, sizeof(mEditDistanceTable)); - // NOTE: mCorrectionStates is an array of instances. - // No need to initialize it explicitly here. - } - - // Non virtual inline destructor -- never inherit this class - ~Correction() {} - void resetCorrection(); - void initCorrection(const ProximityInfo *pi, const int inputSize, const int maxDepth); - void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll); - - // TODO: remove - void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, - const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance, - const bool doAutoCompletion, const int maxErrors); - void checkState() const; - bool sameAsTyped() const; - bool initProcessState(const int index); - - int getInputIndex() const; - - bool needsToPrune() const; - - int pushAndGetTotalTraverseCount() { - return ++mTotalTraverseCount; - } - - int getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, - const int wordCount, const bool isSpaceProximity, const int *word) const; - int getFinalProbability(const int probability, int **word, int *wordLength); - int getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength, - const int inputSize); - - CorrectionType processCharAndCalcState(const int c, const bool isTerminal); - - ///////////////////////// - // Tree helper methods - int goDownTree(const int parentIndex, const int childCount, const int firstChildPos); - - inline int getTreeSiblingPos(const int index) const { - return mCorrectionStates[index].mSiblingPos; - } - - inline void setTreeSiblingPos(const int index, const int pos) { - mCorrectionStates[index].mSiblingPos = pos; - } - - inline int getTreeParentIndex(const int index) const { - return mCorrectionStates[index].mParentIndex; - } - - class RankingAlgorithm { - public: - static int calculateFinalProbability(const int inputIndex, const int depth, - const int probability, int *editDistanceTable, const Correction *correction, - const int inputSize); - static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, - const int wordCount, const Correction *correction, const bool isSpaceProximity, - const int *word); - static float calcNormalizedScore(const int *before, const int beforeLength, - const int *after, const int afterLength, const int score); - static int editDistance(const int *before, const int beforeLength, const int *after, - const int afterLength); - private: - static const int MAX_INITIAL_SCORE = 255; - }; - - // proximity info state - void initInputParams(const ProximityInfo *proximityInfo, const int *inputCodes, - const int inputSize, const int *xCoordinates, const int *yCoordinates) { - mProximityInfoState.initInputParams(0, static_cast(MAX_VALUE_FOR_WEIGHTING), - proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false); - } - - const int *getPrimaryInputWord() const { - return mProximityInfoState.getPrimaryInputWord(); - } - - int getPrimaryCodePointAt(const int index) const { - return mProximityInfoState.getPrimaryCodePointAt(index); - } - - private: - DISALLOW_COPY_AND_ASSIGN(Correction); - - // The following "rate"s are used as a multiplier before dividing by 100, so they are in - // percent. - static const int WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE; - static const int WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X; - static const int WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE; - static const int WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE; - static const int WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE; - static const int WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE; - static const int WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE; - static const int FULL_MATCHED_WORDS_PROMOTION_RATE; - static const int WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE; - static const int WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE; - static const int WORDS_WITH_MATCH_SKIP_PROMOTION_RATE; - static const int WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE; - static const int WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER; - static const int CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE; - static const int INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE; - static const int FIRST_CHAR_DIFFERENT_DEMOTION_RATE; - static const int TWO_WORDS_CAPITALIZED_DEMOTION_RATE; - static const int TWO_WORDS_CORRECTION_DEMOTION_BASE; - - ///////////////////////// - // static inline utils // - ///////////////////////// - static const int TWO_31ST_DIV_255 = S_INT_MAX / 255; - static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) { - return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX); - } - - static const int TWO_31ST_DIV_2 = S_INT_MAX / 2; - AK_FORCE_INLINE static void multiplyIntCapped(const int multiplier, int *base) { - const int temp = *base; - if (temp != S_INT_MAX) { - // Branch if multiplier == 2 for the optimization - if (multiplier < 0) { - if (DEBUG_DICT) { - ASSERT(false); - } - AKLOGI("--- Invalid multiplier: %d", multiplier); - } else if (multiplier == 0) { - *base = 0; - } else if (multiplier == 2) { - *base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX; - } else { - // TODO: This overflow check gives a wrong answer when, for example, - // temp = 2^16 + 1 and multiplier = 2^17 + 1. - // Fix this behavior. - const int tempRetval = temp * multiplier; - *base = tempRetval >= temp ? tempRetval : S_INT_MAX; - } - } - } - - AK_FORCE_INLINE static int powerIntCapped(const int base, const int n) { - if (n <= 0) return 1; - if (base == 2) { - return n < 31 ? 1 << n : S_INT_MAX; - } - int ret = base; - for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret); - return ret; - } - - AK_FORCE_INLINE static void multiplyRate(const int rate, int *freq) { - if (*freq != S_INT_MAX) { - if (*freq > 1000000) { - *freq /= 100; - multiplyIntCapped(rate, freq); - } else { - multiplyIntCapped(rate, freq); - *freq /= 100; - } - } - } - - inline int getSpaceProximityPos() const { - return mSpaceProximityPos; - } - inline int getMissingSpacePos() const { - return mMissingSpacePos; - } - - inline int getSkipPos() const { - return mSkipPos; - } - - inline int getExcessivePos() const { - return mExcessivePos; - } - - inline int getTransposedPos() const { - return mTransposedPos; - } - - inline void incrementInputIndex(); - inline void incrementOutputIndex(); - inline void startToTraverseAllNodes(); - inline bool isSingleQuote(const int c); - inline CorrectionType processSkipChar(const int c, const bool isTerminal, - const bool inputIndexIncremented); - inline CorrectionType processUnrelatedCorrectionType(); - inline void addCharToCurrentWord(const int c); - inline int getFinalProbabilityInternal(const int probability, int **word, int *wordLength, - const int inputSize); - - static const int TYPED_LETTER_MULTIPLIER = 2; - static const int FULL_WORD_MULTIPLIER = 2; - const ProximityInfo *mProximityInfo; - - bool mUseFullEditDistance; - bool mDoAutoCompletion; - int mMaxEditDistance; - int mMaxDepth; - int mInputSize; - int mSpaceProximityPos; - int mMissingSpacePos; - int mTerminalInputIndex; - int mTerminalOutputIndex; - int mMaxErrors; - - int mTotalTraverseCount; - - // The following arrays are state buffer. - int mWord[MAX_WORD_LENGTH]; - int mDistances[MAX_WORD_LENGTH]; - - // Edit distance calculation requires a buffer with (N+1)^2 length for the input length N. - // Caveat: Do not create multiple tables per thread as this table eats up RAM a lot. - int mEditDistanceTable[(MAX_WORD_LENGTH + 1) * (MAX_WORD_LENGTH + 1)]; - - CorrectionState mCorrectionStates[MAX_WORD_LENGTH]; - - // The following member variables are being used as cache values of the correction state. - bool mNeedsToTraverseAllNodes; - int mOutputIndex; - int mInputIndex; - - int mEquivalentCharCount; - int mProximityCount; - int mExcessiveCount; - int mTransposedCount; - int mSkippedCount; - - int mTransposedPos; - int mExcessivePos; - int mSkipPos; - - bool mLastCharExceeded; - - bool mMatching; - bool mProximityMatching; - bool mAdditionalProximityMatching; - bool mExceeding; - bool mTransposing; - bool mSkipping; - ProximityInfoState mProximityInfoState; -}; - -inline void Correction::incrementInputIndex() { - ++mInputIndex; -} - -AK_FORCE_INLINE void Correction::incrementOutputIndex() { - ++mOutputIndex; - mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex; - mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount; - mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos; - mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex; - mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes; - - mCorrectionStates[mOutputIndex].mEquivalentCharCount = mEquivalentCharCount; - mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount; - mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount; - mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount; - mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount; - - mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos; - mCorrectionStates[mOutputIndex].mTransposedPos = mTransposedPos; - mCorrectionStates[mOutputIndex].mExcessivePos = mExcessivePos; - - mCorrectionStates[mOutputIndex].mLastCharExceeded = mLastCharExceeded; - - mCorrectionStates[mOutputIndex].mMatching = mMatching; - mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching; - mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching; - mCorrectionStates[mOutputIndex].mTransposing = mTransposing; - mCorrectionStates[mOutputIndex].mExceeding = mExceeding; - mCorrectionStates[mOutputIndex].mSkipping = mSkipping; -} - -inline void Correction::startToTraverseAllNodes() { - mNeedsToTraverseAllNodes = true; -} - -AK_FORCE_INLINE bool Correction::isSingleQuote(const int c) { - const int userTypedChar = mProximityInfoState.getPrimaryCodePointAt(mInputIndex); - return (c == KEYCODE_SINGLE_QUOTE && userTypedChar != KEYCODE_SINGLE_QUOTE); -} - -AK_FORCE_INLINE Correction::CorrectionType Correction::processSkipChar(const int c, - const bool isTerminal, const bool inputIndexIncremented) { - addCharToCurrentWord(c); - mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0); - mTerminalOutputIndex = mOutputIndex; - incrementOutputIndex(); - if (mNeedsToTraverseAllNodes && isTerminal) { - return TRAVERSE_ALL_ON_TERMINAL; - } - return TRAVERSE_ALL_NOT_ON_TERMINAL; -} - -inline Correction::CorrectionType Correction::processUnrelatedCorrectionType() { - // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType - mTerminalInputIndex = mInputIndex; - mTerminalOutputIndex = mOutputIndex; - return UNRELATED; -} - -AK_FORCE_INLINE static void calcEditDistanceOneStep(int *editDistanceTable, const int *input, - const int inputSize, const int *output, const int outputLength) { - // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH] is not touched. - // Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j]. - // Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated, - // and calculate dp[ouputLength][0] ... dp[outputLength][inputSize]. - int *const current = editDistanceTable + outputLength * (inputSize + 1); - const int *const prev = editDistanceTable + (outputLength - 1) * (inputSize + 1); - const int *const prevprev = - outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0; - current[0] = outputLength; - const int co = CharUtils::toBaseLowerCase(output[outputLength - 1]); - const int prevCO = outputLength >= 2 ? CharUtils::toBaseLowerCase(output[outputLength - 2]) : 0; - for (int i = 1; i <= inputSize; ++i) { - const int ci = CharUtils::toBaseLowerCase(input[i - 1]); - const int cost = (ci == co) ? 0 : 1; - current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost)); - if (i >= 2 && prevprev && ci == prevCO && co == CharUtils::toBaseLowerCase(input[i - 2])) { - current[i] = min(current[i], prevprev[i - 2] + 1); - } - } -} - -AK_FORCE_INLINE void Correction::addCharToCurrentWord(const int c) { - mWord[mOutputIndex] = c; - const int *primaryInputWord = mProximityInfoState.getPrimaryInputWord(); - calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize, mWord, - mOutputIndex + 1); -} - -inline int Correction::getFinalProbabilityInternal(const int probability, int **word, - int *wordLength, const int inputSize) { - const int outputIndex = mTerminalOutputIndex; - const int inputIndex = mTerminalInputIndex; - *wordLength = outputIndex + 1; - *word = mWord; - int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability( - inputIndex, outputIndex, probability, mEditDistanceTable, this, inputSize); - return finalProbability; -} - -} // namespace latinime -#endif // LATINIME_CORRECTION_H diff --git a/native/jni/src/obsolete/correction_state.h b/native/jni/src/obsolete/correction_state.h deleted file mode 100644 index a63d4aa94..000000000 --- a/native/jni/src/obsolete/correction_state.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (C) 2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_CORRECTION_STATE_H -#define LATINIME_CORRECTION_STATE_H - -#include - -#include "defines.h" - -namespace latinime { - -struct CorrectionState { - int mParentIndex; - int mSiblingPos; - uint16_t mChildCount; - uint8_t mInputIndex; - - uint8_t mEquivalentCharCount; - uint8_t mProximityCount; - uint8_t mTransposedCount; - uint8_t mExcessiveCount; - uint8_t mSkippedCount; - - int8_t mTransposedPos; - int8_t mExcessivePos; - int8_t mSkipPos; // should be signed - - // TODO: int? - bool mLastCharExceeded; - - bool mMatching; - bool mTransposing; - bool mExceeding; - bool mSkipping; - bool mProximityMatching; - bool mAdditionalProximityMatching; - - bool mNeedsToTraverseAllNodes; -}; - -inline static void initCorrectionState(CorrectionState *state, const int rootPos, - const uint16_t childCount, const bool traverseAll) { - state->mParentIndex = -1; - state->mChildCount = childCount; - state->mInputIndex = 0; - state->mSiblingPos = rootPos; - state->mNeedsToTraverseAllNodes = traverseAll; - - state->mTransposedPos = -1; - state->mExcessivePos = -1; - state->mSkipPos = -1; - - state->mEquivalentCharCount = 0; - state->mProximityCount = 0; - state->mTransposedCount = 0; - state->mExcessiveCount = 0; - state->mSkippedCount = 0; - - state->mLastCharExceeded = false; - - state->mMatching = false; - state->mProximityMatching = false; - state->mTransposing = false; - state->mExceeding = false; - state->mSkipping = false; - state->mAdditionalProximityMatching = false; -} -} // namespace latinime -#endif // LATINIME_CORRECTION_STATE_H diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index 59d1b19b6..53e2df62d 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -23,7 +23,6 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_format.h" -#include "suggest/core/dictionary/bloom_filter.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/probability_utils.h" #include "utils/char_utils.h" @@ -170,30 +169,6 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in return pos; } -void BigramDictionary::fillBigramAddressToProbabilityMapAndFilter(const int *prevWord, - const int prevWordLength, std::map *map, uint8_t *filter) const { - memset(filter, 0, BIGRAM_FILTER_BYTE_SIZE); - const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot(); - int pos = getBigramListPositionForWord(prevWord, prevWordLength, - false /* forceLowerCaseSearch */); - if (0 == pos) { - // If no bigrams for this exact string, search again in lower case. - pos = getBigramListPositionForWord(prevWord, prevWordLength, - true /* forceLowerCaseSearch */); - } - if (0 == pos) return; - - uint8_t bigramFlags; - do { - bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); - const int probability = BinaryFormat::MASK_ATTRIBUTE_PROBABILITY & bigramFlags; - const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags, - &pos); - (*map)[bigramPos] = probability; - setInFilter(filter, bigramPos); - } while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags); -} - bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) const { // Checks whether this word starts with same character or neighboring characters of // what user typed. diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h index 8b7a253a2..06d0e9da3 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h @@ -17,9 +17,6 @@ #ifndef LATINIME_BIGRAM_DICTIONARY_H #define LATINIME_BIGRAM_DICTIONARY_H -#include -#include - #include "defines.h" namespace latinime { @@ -32,10 +29,9 @@ class BigramDictionary { int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords, int *frequencies, int *outputTypes) const; - void fillBigramAddressToProbabilityMapAndFilter(const int *prevWord, const int prevWordLength, - std::map *map, uint8_t *filter) const; bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const; ~BigramDictionary(); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary); diff --git a/native/jni/src/suggest/core/dictionary/binary_format.h b/native/jni/src/suggest/core/dictionary/binary_format.h index f580bdad5..0a290d80a 100644 --- a/native/jni/src/suggest/core/dictionary/binary_format.h +++ b/native/jni/src/suggest/core/dictionary/binary_format.h @@ -19,7 +19,6 @@ #include -#include "suggest/core/dictionary/bloom_filter.h" #include "suggest/core/dictionary/probability_utils.h" #include "utils/char_utils.h" #include "utils/hash_map_compat.h" diff --git a/native/jni/src/suggest/core/dictionary/probability_utils.h b/native/jni/src/suggest/core/dictionary/probability_utils.h index 14d2f8436..f450087d8 100644 --- a/native/jni/src/suggest/core/dictionary/probability_utils.h +++ b/native/jni/src/suggest/core/dictionary/probability_utils.h @@ -17,7 +17,6 @@ #ifndef LATINIME_PROBABILITY_UTILS_H #define LATINIME_PROBABILITY_UTILS_H -#include #include #include "defines.h" @@ -49,24 +48,6 @@ class ProbabilityUtils { + static_cast(static_cast(bigramProbability + 1) * stepSize); } - // This returns a probability in log space. - static AK_FORCE_INLINE int getProbability(const int position, - const std::map *const bigramMap, - const uint8_t *bigramFilter, const int unigramProbability) { - if (!bigramMap || !bigramFilter) { - return backoff(unigramProbability); - } - if (!isInFilter(bigramFilter, position)){ - return backoff(unigramProbability); - } - const std::map::const_iterator bigramProbabilityIt = bigramMap->find(position); - if (bigramProbabilityIt != bigramMap->end()) { - const int bigramProbability = bigramProbabilityIt->second; - return computeProbabilityForBigram(unigramProbability, bigramProbability); - } - return backoff(unigramProbability); - } - private: DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityUtils); }; diff --git a/native/jni/src/suggest/core/layout/proximity_info.h b/native/jni/src/suggest/core/layout/proximity_info.h index 6ca2fdd7b..534c2c217 100644 --- a/native/jni/src/suggest/core/layout/proximity_info.h +++ b/native/jni/src/suggest/core/layout/proximity_info.h @@ -24,8 +24,6 @@ namespace latinime { -class Correction; - class ProximityInfo { public: ProximityInfo(JNIEnv *env, const jstring localeJStr, @@ -41,7 +39,6 @@ class ProximityInfo { float getNormalizedSquaredDistanceFromCenterFloatG( const int keyId, const int x, const int y, const float verticalScale) const; - bool sameAsTyped(const unsigned short *word, int length) const; int getCodePointOf(const int keyIndex) const; bool hasSweetSpotData(const int keyIndex) const { // When there are no calibration data for a key, @@ -95,8 +92,6 @@ class ProximityInfo { DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo); void initializeG(); - float calculateNormalizedSquaredDistance(const int keyIndex, const int inputIndex) const; - bool hasInputCoordinates() const; const int GRID_WIDTH; const int GRID_HEIGHT; diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.cpp b/native/jni/src/suggest/core/layout/proximity_info_state.cpp index 4e53992d4..e8d950060 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info_state.cpp @@ -156,11 +156,6 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi if (!isGeometric && pointerId == 0) { ProximityInfoStateUtils::initPrimaryInputWord( inputSize, mInputProximities, mPrimaryInputWord); - if (mTouchPositionCorrectionEnabled) { - ProximityInfoStateUtils::initNormalizedSquaredDistances( - mProximityInfo, inputSize, xCoordinates, yCoordinates, mInputProximities, - &mSampledInputXs, &mSampledInputYs, mNormalizedSquaredDistances); - } } if (DEBUG_GEO_FULL) { AKLOGI("ProximityState init finished: %d points out of %d", mSampledInputSize, inputSize); @@ -279,26 +274,6 @@ float ProximityInfoState::getDirection(const int index0, const int index1) const &mSampledInputXs, &mSampledInputYs, index0, index1); } -float ProximityInfoState::getLineToKeyDistance( - const int from, const int to, const int keyId, const bool extend) const { - if (from < 0 || from > mSampledInputSize - 1) { - return 0.0f; - } - if (to < 0 || to > mSampledInputSize - 1) { - return 0.0f; - } - const int x0 = mSampledInputXs[from]; - const int y0 = mSampledInputYs[from]; - const int x1 = mSampledInputXs[to]; - const int y1 = mSampledInputYs[to]; - - const int keyX = mProximityInfo->getKeyCenterXOfKeyIdG(keyId); - const int keyY = mProximityInfo->getKeyCenterYOfKeyIdG(keyId); - - return ProximityInfoUtils::pointToLineSegSquaredDistanceFloat( - keyX, keyY, x0, y0, x1, y1, extend); -} - float ProximityInfoState::getMostProbableString(int *const codePointBuf) const { memcpy(codePointBuf, mMostProbableString, sizeof(mMostProbableString)); return mMostProbableStringProbability; diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.h b/native/jni/src/suggest/core/layout/proximity_info_state.h index 0079ab5b8..cc6410af1 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state.h +++ b/native/jni/src/suggest/core/layout/proximity_info_state.h @@ -53,7 +53,6 @@ class ProximityInfoState { mSampledSearchKeyVectors(), mTouchPositionCorrectionEnabled(false), mSampledInputSize(0), mMostProbableStringProbability(0.0f) { memset(mInputProximities, 0, sizeof(mInputProximities)); - memset(mNormalizedSquaredDistances, 0, sizeof(mNormalizedSquaredDistances)); memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord)); memset(mMostProbableString, 0, sizeof(mMostProbableString)); } @@ -91,6 +90,19 @@ class ProximityInfoState { return false; } + // TODO: Promote insertion letter correction if that letter is a proximity of the previous + // letter like follows: + // // Demotion for a word with excessive character + // if (excessiveCount > 0) { + // multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq); + // if (!lastCharExceeded + // && !proximityInfoState->existsAdjacentProximityChars(excessivePos)) { + // // If an excessive character is not adjacent to the left char or the right char, + // // we will demote this word. + // multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, + // &finalFreq); + // } + // } inline bool existsAdjacentProximityChars(const int index) const { if (index < 0 || index >= mSampledInputSize) return false; const int currentCodePoint = getPrimaryCodePointAt(index); @@ -106,12 +118,6 @@ class ProximityInfoState { return false; } - inline int getNormalizedSquaredDistance( - const int inputIndex, const int proximityIndex) const { - return mNormalizedSquaredDistances[ - inputIndex * MAX_PROXIMITY_CHARS_SIZE + proximityIndex]; - } - inline const int *getPrimaryInputWord() const { return mPrimaryInputWord; } @@ -190,24 +196,10 @@ class ProximityInfoState { float getProbability(const int index, const int charCode) const; - float getLineToKeyDistance( - const int from, const int to, const int keyId, const bool extend) const; - bool isKeyInSerchKeysAfterIndex(const int index, const int keyId) const; private: DISALLOW_COPY_AND_ASSIGN(ProximityInfoState); - ///////////////////////////////////////// - // Defined in proximity_info_state.cpp // - ///////////////////////////////////////// - float calculateNormalizedSquaredDistance(const int keyIndex, const int inputIndex) const; - - float calculateSquaredDistanceFromSweetSpotCenter( - const int keyIndex, const int inputIndex) const; - - ///////////////////////////////////////// - // Defined here // - ///////////////////////////////////////// inline const int *getProximityCodePointsAt(const int index) const { return ProximityInfoStateUtils::getProximityCodePointsAt(mInputProximities, index); @@ -249,7 +241,6 @@ class ProximityInfoState { std::vector > mSampledSearchKeyVectors; bool mTouchPositionCorrectionEnabled; int mInputProximities[MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH]; - int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH]; int mSampledInputSize; int mPrimaryInputWord[MAX_WORD_LENGTH]; float mMostProbableStringProbability; diff --git a/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp b/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp index 6f88833a2..1bbae652c 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp @@ -181,48 +181,6 @@ namespace latinime { return squaredDistance / squaredRadius; } -/* static */ void ProximityInfoStateUtils::initNormalizedSquaredDistances( - const ProximityInfo *const proximityInfo, const int inputSize, const int *inputXCoordinates, - const int *inputYCoordinates, const int *const inputProximities, - const std::vector *const sampledInputXs, const std::vector *const sampledInputYs, - int *normalizedSquaredDistances) { - memset(normalizedSquaredDistances, NOT_A_DISTANCE, - sizeof(normalizedSquaredDistances[0]) * MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH); - const bool hasInputCoordinates = sampledInputXs->size() > 0 && sampledInputYs->size() > 0; - for (int i = 0; i < inputSize; ++i) { - const int *proximityCodePoints = getProximityCodePointsAt(inputProximities, i); - const int primaryKey = proximityCodePoints[0]; - const int x = inputXCoordinates[i]; - const int y = inputYCoordinates[i]; - if (DEBUG_PROXIMITY_CHARS) { - int a = x + y + primaryKey; - a += 0; - AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y); - } - for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE && proximityCodePoints[j] > 0; ++j) { - const int currentCodePoint = proximityCodePoints[j]; - const float squaredDistance = - hasInputCoordinates ? calculateNormalizedSquaredDistance( - proximityInfo, sampledInputXs, sampledInputYs, - proximityInfo->getKeyIndexOf(currentCodePoint), i) : - ProximityInfoParams::NOT_A_DISTANCE_FLOAT; - if (squaredDistance >= 0.0f) { - normalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] = - static_cast(squaredDistance - * ProximityInfoParams::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR); - } else { - normalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] = - (j == 0) ? MATCH_CHAR_WITHOUT_DISTANCE_INFO : - PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO; - } - if (DEBUG_PROXIMITY_CHARS) { - AKLOGI("--- Proximity (%d) = %c", j, currentCodePoint); - } - } - } - -} - /* static */ void ProximityInfoStateUtils::initGeometricDistanceInfos( const ProximityInfo *const proximityInfo, const int sampledInputSize, const int lastSavedInputSize, const float verticalSweetSpotScale, diff --git a/native/jni/src/suggest/core/layout/touch_position_correction_utils.h b/native/jni/src/suggest/core/layout/touch_position_correction_utils.h index 429dcae0d..9130e87d3 100644 --- a/native/jni/src/suggest/core/layout/touch_position_correction_utils.h +++ b/native/jni/src/suggest/core/layout/touch_position_correction_utils.h @@ -23,31 +23,6 @@ namespace latinime { class TouchPositionCorrectionUtils { public: - // TODO: (OLD) Remove - static float getLengthScalingFactor(const float normalizedSquaredDistance) { - // Promote or demote the score according to the distance from the sweet spot - static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f; - static const float B = 1.0f; - static const float C = 0.5f; - static const float MIN = 0.3f; - static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS; - static const float R2 = HALF_SCORE_SQUARED_RADIUS; - const float x = normalizedSquaredDistance / static_cast( - ProximityInfoParams::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR); - const float factor = max((x < R1) - ? (A * (R1 - x) + B * x) / R1 - : (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN); - // factor is a piecewise linear function like: - // A -_ . - // ^-_ . - // B \ . - // \_ . - // C ------------. - // . - // 0 R1 R2 . - return factor; - } - static float getSweetSpotFactor(const bool isTouchPositionCorrectionEnabled, const float normalizedSquaredDistance) { // Promote or demote the score according to the distance from the sweet spot diff --git a/native/jni/src/suggest/policyimpl/utils/edit_distance.h b/native/jni/src/suggest/policyimpl/utils/edit_distance.h index cbbd66894..0871c37ce 100644 --- a/native/jni/src/suggest/policyimpl/utils/edit_distance.h +++ b/native/jni/src/suggest/policyimpl/utils/edit_distance.h @@ -62,6 +62,26 @@ class EditDistance { return dp[(beforeLength + 1) * (afterLength + 1) - 1]; } + AK_FORCE_INLINE static void dumpEditDistance10ForDebug(const float *const editDistanceTable, + const int editDistanceTableWidth, const int outputLength) { + if (DEBUG_DICT) { + AKLOGI("EditDistanceTable"); + for (int i = 0; i <= 10; ++i) { + float c[11]; + for (int j = 0; j <= 10; ++j) { + if (j < editDistanceTableWidth + 1 && i < outputLength + 1) { + c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j]; + } else { + c[j] = -1.0f; + } + } + AKLOGI("[ %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f ]", + c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]); + (void)c; // To suppress compiler warning + } + } + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(EditDistance); }; diff --git a/native/jni/src/utils/autocorrection_threshold_utils.cpp b/native/jni/src/utils/autocorrection_threshold_utils.cpp new file mode 100644 index 000000000..3406e0f8e --- /dev/null +++ b/native/jni/src/utils/autocorrection_threshold_utils.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/autocorrection_threshold_utils.h" + +#include + +#include "defines.h" +#include "suggest/policyimpl/utils/edit_distance.h" +#include "suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h" + +namespace latinime { + +const int AutocorrectionThresholdUtils::MAX_INITIAL_SCORE = 255; +const int AutocorrectionThresholdUtils::TYPED_LETTER_MULTIPLIER = 2; +const int AutocorrectionThresholdUtils::FULL_WORD_MULTIPLIER = 2; + +/* static */ int AutocorrectionThresholdUtils::editDistance(const int *before, + const int beforeLength, const int *after, const int afterLength) { + const DamerauLevenshteinEditDistancePolicy daemaruLevenshtein( + before, beforeLength, after, afterLength); + return static_cast(EditDistance::getEditDistance(&daemaruLevenshtein)); +} + +// In dictionary.cpp, getSuggestion() method, +// When USE_SUGGEST_INTERFACE_FOR_TYPING is true: +// +// // TODO: Revise the following logic thoroughly by referring to the logic +// // marked as "Otherwise" below. +// SUGGEST_INTERFACE_OUTPUT_SCALE was multiplied to the original suggestion scores to convert +// them to integers. +// score = (int)((original score) * SUGGEST_INTERFACE_OUTPUT_SCALE) +// Undo the scaling here to recover the original score. +// normalizedScore = ((float)score) / SUGGEST_INTERFACE_OUTPUT_SCALE +// +// Otherwise: suggestion scores are computed using the below formula. +// original score +// := powf(mTypedLetterMultiplier (this is defined 2), +// (the number of matched characters between typed word and suggested word)) +// * (individual word's score which defined in the unigram dictionary, +// and this score is defined in range [0, 255].) +// Then, the following processing is applied. +// - If the dictionary word is matched up to the point of the user entry +// (full match up to min(before.length(), after.length()) +// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2) +// - If the word is a true full match except for differences in accents or +// capitalization, then treat it as if the score was 255. +// - If before.length() == after.length() +// => multiply by mFullWordMultiplier (this is defined 2)) +// So, maximum original score is powf(2, min(before.length(), after.length())) * 255 * 2 * 1.2 +// For historical reasons we ignore the 1.2 modifier (because the measure for a good +// autocorrection threshold was done at a time when it didn't exist). This doesn't change +// the result. +// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2. + +/* static */ float AutocorrectionThresholdUtils::calcNormalizedScore(const int *before, + const int beforeLength, const int *after, const int afterLength, const int score) { + if (0 == beforeLength || 0 == afterLength) { + return 0.0f; + } + const int distance = editDistance(before, beforeLength, after, afterLength); + int spaceCount = 0; + for (int i = 0; i < afterLength; ++i) { + if (after[i] == KEYCODE_SPACE) { + ++spaceCount; + } + } + + if (spaceCount == afterLength) { + return 0.0f; + } + + // add a weight based on edit distance. + // distance <= max(afterLength, beforeLength) == afterLength, + // so, 0 <= distance / afterLength <= 1 + const float weight = 1.0f - static_cast(distance) / static_cast(afterLength); + + // TODO: Revise the following logic thoroughly by referring to... + if (true /* USE_SUGGEST_INTERFACE_FOR_TYPING */) { + return (static_cast(score) / SUGGEST_INTERFACE_OUTPUT_SCALE) * weight; + } + // ...this logic. + const float maxScore = score >= S_INT_MAX ? static_cast(S_INT_MAX) + : static_cast(MAX_INITIAL_SCORE) + * powf(static_cast(TYPED_LETTER_MULTIPLIER), + static_cast(min(beforeLength, afterLength - spaceCount))) + * static_cast(FULL_WORD_MULTIPLIER); + + return (static_cast(score) / maxScore) * weight; +} + +} // namespace latinime diff --git a/native/jni/src/utils/autocorrection_threshold_utils.h b/native/jni/src/utils/autocorrection_threshold_utils.h new file mode 100644 index 000000000..c7537a6a5 --- /dev/null +++ b/native/jni/src/utils/autocorrection_threshold_utils.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_AUTOCORRECTION_THRESHOLD_UTILS_H +#define LATINIME_AUTOCORRECTION_THRESHOLD_UTILS_H + +#include "defines.h" + +namespace latinime { + +class AutocorrectionThresholdUtils { + public: + static float calcNormalizedScore(const int *before, const int beforeLength, + const int *after, const int afterLength, const int score); + static int editDistance(const int *before, const int beforeLength, const int *after, + const int afterLength); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(AutocorrectionThresholdUtils); + + static const int MAX_INITIAL_SCORE; + static const int TYPED_LETTER_MULTIPLIER; + static const int FULL_WORD_MULTIPLIER; +}; +} // namespace latinime +#endif // LATINIME_AUTOCORRECTION_THRESHOLD_UTILS_H