am 527c1283: Remove unused methods.
* commit '527c128309da708d0fdaf7928da833320d1754e9': Remove unused methods.main
commit
675507766a
|
@ -47,8 +47,6 @@ LATIN_IME_JNI_SRC_FILES := \
|
|||
|
||||
LATIN_IME_CORE_SRC_FILES := \
|
||||
suggest/core/suggest.cpp \
|
||||
$(addprefix obsolete/, \
|
||||
correction.cpp) \
|
||||
$(addprefix suggest/core/dicnode/, \
|
||||
dic_node.cpp \
|
||||
dic_node_utils.cpp \
|
||||
|
@ -76,7 +74,9 @@ LATIN_IME_CORE_SRC_FILES := \
|
|||
typing_suggest_policy.cpp \
|
||||
typing_traversal.cpp \
|
||||
typing_weighting.cpp) \
|
||||
utils/char_utils.cpp
|
||||
$(addprefix utils/, \
|
||||
char_utils.cpp \
|
||||
ranking_algorithm.cpp)
|
||||
|
||||
LOCAL_SRC_FILES := \
|
||||
$(LATIN_IME_JNI_SRC_FILES) \
|
||||
|
|
|
@ -22,15 +22,16 @@
|
|||
#include <cstring> // for memset()
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "jni.h"
|
||||
#include "jni_common.h"
|
||||
#include "obsolete/correction.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
||||
#include "suggest/core/dictionary/dictionary.h"
|
||||
#include "suggest/core/suggest_options.h"
|
||||
#include "utils/autocorrection_threshold_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -201,7 +202,7 @@ static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass
|
|||
int afterCodePoints[afterLength];
|
||||
env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
|
||||
env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
|
||||
return Correction::RankingAlgorithm::calcNormalizedScore(beforeCodePoints, beforeLength,
|
||||
return AutocorrectionThresholdUtils::calcNormalizedScore(beforeCodePoints, beforeLength,
|
||||
afterCodePoints, afterLength, score);
|
||||
}
|
||||
|
||||
|
@ -213,7 +214,7 @@ static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jclass clazz, ji
|
|||
int afterCodePoints[afterLength];
|
||||
env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
|
||||
env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
|
||||
return Correction::RankingAlgorithm::editDistance(beforeCodePoints, beforeLength,
|
||||
return AutocorrectionThresholdUtils::editDistance(beforeCodePoints, beforeLength,
|
||||
afterCodePoints, afterLength);
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,398 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2011 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_CORRECTION_H
|
||||
#define LATINIME_CORRECTION_H
|
||||
|
||||
#include <cstring> // for memset()
|
||||
|
||||
#include "defines.h"
|
||||
#include "obsolete/correction_state.h"
|
||||
#include "suggest/core/layout/proximity_info_state.h"
|
||||
#include "utils/char_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class ProximityInfo;
|
||||
|
||||
class Correction {
|
||||
public:
|
||||
typedef enum {
|
||||
TRAVERSE_ALL_ON_TERMINAL,
|
||||
TRAVERSE_ALL_NOT_ON_TERMINAL,
|
||||
UNRELATED,
|
||||
ON_TERMINAL,
|
||||
NOT_ON_TERMINAL
|
||||
} CorrectionType;
|
||||
|
||||
Correction()
|
||||
: mProximityInfo(0), mUseFullEditDistance(false), mDoAutoCompletion(false),
|
||||
mMaxEditDistance(0), mMaxDepth(0), mInputSize(0), mSpaceProximityPos(0),
|
||||
mMissingSpacePos(0), mTerminalInputIndex(0), mTerminalOutputIndex(0), mMaxErrors(0),
|
||||
mTotalTraverseCount(0), mNeedsToTraverseAllNodes(false), mOutputIndex(0),
|
||||
mInputIndex(0), mEquivalentCharCount(0), mProximityCount(0), mExcessiveCount(0),
|
||||
mTransposedCount(0), mSkippedCount(0), mTransposedPos(0), mExcessivePos(0),
|
||||
mSkipPos(0), mLastCharExceeded(false), mMatching(false), mProximityMatching(false),
|
||||
mAdditionalProximityMatching(false), mExceeding(false), mTransposing(false),
|
||||
mSkipping(false), mProximityInfoState() {
|
||||
memset(mWord, 0, sizeof(mWord));
|
||||
memset(mDistances, 0, sizeof(mDistances));
|
||||
memset(mEditDistanceTable, 0, sizeof(mEditDistanceTable));
|
||||
// NOTE: mCorrectionStates is an array of instances.
|
||||
// No need to initialize it explicitly here.
|
||||
}
|
||||
|
||||
// Non virtual inline destructor -- never inherit this class
|
||||
~Correction() {}
|
||||
void resetCorrection();
|
||||
void initCorrection(const ProximityInfo *pi, const int inputSize, const int maxDepth);
|
||||
void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll);
|
||||
|
||||
// TODO: remove
|
||||
void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
|
||||
const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
|
||||
const bool doAutoCompletion, const int maxErrors);
|
||||
void checkState() const;
|
||||
bool sameAsTyped() const;
|
||||
bool initProcessState(const int index);
|
||||
|
||||
int getInputIndex() const;
|
||||
|
||||
bool needsToPrune() const;
|
||||
|
||||
int pushAndGetTotalTraverseCount() {
|
||||
return ++mTotalTraverseCount;
|
||||
}
|
||||
|
||||
int getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
|
||||
const int wordCount, const bool isSpaceProximity, const int *word) const;
|
||||
int getFinalProbability(const int probability, int **word, int *wordLength);
|
||||
int getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength,
|
||||
const int inputSize);
|
||||
|
||||
CorrectionType processCharAndCalcState(const int c, const bool isTerminal);
|
||||
|
||||
/////////////////////////
|
||||
// Tree helper methods
|
||||
int goDownTree(const int parentIndex, const int childCount, const int firstChildPos);
|
||||
|
||||
inline int getTreeSiblingPos(const int index) const {
|
||||
return mCorrectionStates[index].mSiblingPos;
|
||||
}
|
||||
|
||||
inline void setTreeSiblingPos(const int index, const int pos) {
|
||||
mCorrectionStates[index].mSiblingPos = pos;
|
||||
}
|
||||
|
||||
inline int getTreeParentIndex(const int index) const {
|
||||
return mCorrectionStates[index].mParentIndex;
|
||||
}
|
||||
|
||||
class RankingAlgorithm {
|
||||
public:
|
||||
static int calculateFinalProbability(const int inputIndex, const int depth,
|
||||
const int probability, int *editDistanceTable, const Correction *correction,
|
||||
const int inputSize);
|
||||
static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
|
||||
const int wordCount, const Correction *correction, const bool isSpaceProximity,
|
||||
const int *word);
|
||||
static float calcNormalizedScore(const int *before, const int beforeLength,
|
||||
const int *after, const int afterLength, const int score);
|
||||
static int editDistance(const int *before, const int beforeLength, const int *after,
|
||||
const int afterLength);
|
||||
private:
|
||||
static const int MAX_INITIAL_SCORE = 255;
|
||||
};
|
||||
|
||||
// proximity info state
|
||||
void initInputParams(const ProximityInfo *proximityInfo, const int *inputCodes,
|
||||
const int inputSize, const int *xCoordinates, const int *yCoordinates) {
|
||||
mProximityInfoState.initInputParams(0, static_cast<float>(MAX_VALUE_FOR_WEIGHTING),
|
||||
proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false);
|
||||
}
|
||||
|
||||
const int *getPrimaryInputWord() const {
|
||||
return mProximityInfoState.getPrimaryInputWord();
|
||||
}
|
||||
|
||||
int getPrimaryCodePointAt(const int index) const {
|
||||
return mProximityInfoState.getPrimaryCodePointAt(index);
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(Correction);
|
||||
|
||||
// The following "rate"s are used as a multiplier before dividing by 100, so they are in
|
||||
// percent.
|
||||
static const int WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE;
|
||||
static const int WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X;
|
||||
static const int WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE;
|
||||
static const int WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE;
|
||||
static const int WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE;
|
||||
static const int WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE;
|
||||
static const int WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE;
|
||||
static const int FULL_MATCHED_WORDS_PROMOTION_RATE;
|
||||
static const int WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE;
|
||||
static const int WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE;
|
||||
static const int WORDS_WITH_MATCH_SKIP_PROMOTION_RATE;
|
||||
static const int WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE;
|
||||
static const int WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER;
|
||||
static const int CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE;
|
||||
static const int INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE;
|
||||
static const int FIRST_CHAR_DIFFERENT_DEMOTION_RATE;
|
||||
static const int TWO_WORDS_CAPITALIZED_DEMOTION_RATE;
|
||||
static const int TWO_WORDS_CORRECTION_DEMOTION_BASE;
|
||||
|
||||
/////////////////////////
|
||||
// static inline utils //
|
||||
/////////////////////////
|
||||
static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
|
||||
static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
|
||||
return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
|
||||
}
|
||||
|
||||
static const int TWO_31ST_DIV_2 = S_INT_MAX / 2;
|
||||
AK_FORCE_INLINE static void multiplyIntCapped(const int multiplier, int *base) {
|
||||
const int temp = *base;
|
||||
if (temp != S_INT_MAX) {
|
||||
// Branch if multiplier == 2 for the optimization
|
||||
if (multiplier < 0) {
|
||||
if (DEBUG_DICT) {
|
||||
ASSERT(false);
|
||||
}
|
||||
AKLOGI("--- Invalid multiplier: %d", multiplier);
|
||||
} else if (multiplier == 0) {
|
||||
*base = 0;
|
||||
} else if (multiplier == 2) {
|
||||
*base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX;
|
||||
} else {
|
||||
// TODO: This overflow check gives a wrong answer when, for example,
|
||||
// temp = 2^16 + 1 and multiplier = 2^17 + 1.
|
||||
// Fix this behavior.
|
||||
const int tempRetval = temp * multiplier;
|
||||
*base = tempRetval >= temp ? tempRetval : S_INT_MAX;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE static int powerIntCapped(const int base, const int n) {
|
||||
if (n <= 0) return 1;
|
||||
if (base == 2) {
|
||||
return n < 31 ? 1 << n : S_INT_MAX;
|
||||
}
|
||||
int ret = base;
|
||||
for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE static void multiplyRate(const int rate, int *freq) {
|
||||
if (*freq != S_INT_MAX) {
|
||||
if (*freq > 1000000) {
|
||||
*freq /= 100;
|
||||
multiplyIntCapped(rate, freq);
|
||||
} else {
|
||||
multiplyIntCapped(rate, freq);
|
||||
*freq /= 100;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline int getSpaceProximityPos() const {
|
||||
return mSpaceProximityPos;
|
||||
}
|
||||
inline int getMissingSpacePos() const {
|
||||
return mMissingSpacePos;
|
||||
}
|
||||
|
||||
inline int getSkipPos() const {
|
||||
return mSkipPos;
|
||||
}
|
||||
|
||||
inline int getExcessivePos() const {
|
||||
return mExcessivePos;
|
||||
}
|
||||
|
||||
inline int getTransposedPos() const {
|
||||
return mTransposedPos;
|
||||
}
|
||||
|
||||
inline void incrementInputIndex();
|
||||
inline void incrementOutputIndex();
|
||||
inline void startToTraverseAllNodes();
|
||||
inline bool isSingleQuote(const int c);
|
||||
inline CorrectionType processSkipChar(const int c, const bool isTerminal,
|
||||
const bool inputIndexIncremented);
|
||||
inline CorrectionType processUnrelatedCorrectionType();
|
||||
inline void addCharToCurrentWord(const int c);
|
||||
inline int getFinalProbabilityInternal(const int probability, int **word, int *wordLength,
|
||||
const int inputSize);
|
||||
|
||||
static const int TYPED_LETTER_MULTIPLIER = 2;
|
||||
static const int FULL_WORD_MULTIPLIER = 2;
|
||||
const ProximityInfo *mProximityInfo;
|
||||
|
||||
bool mUseFullEditDistance;
|
||||
bool mDoAutoCompletion;
|
||||
int mMaxEditDistance;
|
||||
int mMaxDepth;
|
||||
int mInputSize;
|
||||
int mSpaceProximityPos;
|
||||
int mMissingSpacePos;
|
||||
int mTerminalInputIndex;
|
||||
int mTerminalOutputIndex;
|
||||
int mMaxErrors;
|
||||
|
||||
int mTotalTraverseCount;
|
||||
|
||||
// The following arrays are state buffer.
|
||||
int mWord[MAX_WORD_LENGTH];
|
||||
int mDistances[MAX_WORD_LENGTH];
|
||||
|
||||
// Edit distance calculation requires a buffer with (N+1)^2 length for the input length N.
|
||||
// Caveat: Do not create multiple tables per thread as this table eats up RAM a lot.
|
||||
int mEditDistanceTable[(MAX_WORD_LENGTH + 1) * (MAX_WORD_LENGTH + 1)];
|
||||
|
||||
CorrectionState mCorrectionStates[MAX_WORD_LENGTH];
|
||||
|
||||
// The following member variables are being used as cache values of the correction state.
|
||||
bool mNeedsToTraverseAllNodes;
|
||||
int mOutputIndex;
|
||||
int mInputIndex;
|
||||
|
||||
int mEquivalentCharCount;
|
||||
int mProximityCount;
|
||||
int mExcessiveCount;
|
||||
int mTransposedCount;
|
||||
int mSkippedCount;
|
||||
|
||||
int mTransposedPos;
|
||||
int mExcessivePos;
|
||||
int mSkipPos;
|
||||
|
||||
bool mLastCharExceeded;
|
||||
|
||||
bool mMatching;
|
||||
bool mProximityMatching;
|
||||
bool mAdditionalProximityMatching;
|
||||
bool mExceeding;
|
||||
bool mTransposing;
|
||||
bool mSkipping;
|
||||
ProximityInfoState mProximityInfoState;
|
||||
};
|
||||
|
||||
inline void Correction::incrementInputIndex() {
|
||||
++mInputIndex;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void Correction::incrementOutputIndex() {
|
||||
++mOutputIndex;
|
||||
mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
|
||||
mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
|
||||
mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
|
||||
mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
|
||||
mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
|
||||
|
||||
mCorrectionStates[mOutputIndex].mEquivalentCharCount = mEquivalentCharCount;
|
||||
mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
|
||||
mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount;
|
||||
mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount;
|
||||
mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
|
||||
|
||||
mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
|
||||
mCorrectionStates[mOutputIndex].mTransposedPos = mTransposedPos;
|
||||
mCorrectionStates[mOutputIndex].mExcessivePos = mExcessivePos;
|
||||
|
||||
mCorrectionStates[mOutputIndex].mLastCharExceeded = mLastCharExceeded;
|
||||
|
||||
mCorrectionStates[mOutputIndex].mMatching = mMatching;
|
||||
mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching;
|
||||
mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching;
|
||||
mCorrectionStates[mOutputIndex].mTransposing = mTransposing;
|
||||
mCorrectionStates[mOutputIndex].mExceeding = mExceeding;
|
||||
mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
|
||||
}
|
||||
|
||||
inline void Correction::startToTraverseAllNodes() {
|
||||
mNeedsToTraverseAllNodes = true;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE bool Correction::isSingleQuote(const int c) {
|
||||
const int userTypedChar = mProximityInfoState.getPrimaryCodePointAt(mInputIndex);
|
||||
return (c == KEYCODE_SINGLE_QUOTE && userTypedChar != KEYCODE_SINGLE_QUOTE);
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE Correction::CorrectionType Correction::processSkipChar(const int c,
|
||||
const bool isTerminal, const bool inputIndexIncremented) {
|
||||
addCharToCurrentWord(c);
|
||||
mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
|
||||
mTerminalOutputIndex = mOutputIndex;
|
||||
incrementOutputIndex();
|
||||
if (mNeedsToTraverseAllNodes && isTerminal) {
|
||||
return TRAVERSE_ALL_ON_TERMINAL;
|
||||
}
|
||||
return TRAVERSE_ALL_NOT_ON_TERMINAL;
|
||||
}
|
||||
|
||||
inline Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
|
||||
// Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
|
||||
mTerminalInputIndex = mInputIndex;
|
||||
mTerminalOutputIndex = mOutputIndex;
|
||||
return UNRELATED;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE static void calcEditDistanceOneStep(int *editDistanceTable, const int *input,
|
||||
const int inputSize, const int *output, const int outputLength) {
|
||||
// TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH] is not touched.
|
||||
// Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j].
|
||||
// Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated,
|
||||
// and calculate dp[ouputLength][0] ... dp[outputLength][inputSize].
|
||||
int *const current = editDistanceTable + outputLength * (inputSize + 1);
|
||||
const int *const prev = editDistanceTable + (outputLength - 1) * (inputSize + 1);
|
||||
const int *const prevprev =
|
||||
outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
|
||||
current[0] = outputLength;
|
||||
const int co = CharUtils::toBaseLowerCase(output[outputLength - 1]);
|
||||
const int prevCO = outputLength >= 2 ? CharUtils::toBaseLowerCase(output[outputLength - 2]) : 0;
|
||||
for (int i = 1; i <= inputSize; ++i) {
|
||||
const int ci = CharUtils::toBaseLowerCase(input[i - 1]);
|
||||
const int cost = (ci == co) ? 0 : 1;
|
||||
current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
|
||||
if (i >= 2 && prevprev && ci == prevCO && co == CharUtils::toBaseLowerCase(input[i - 2])) {
|
||||
current[i] = min(current[i], prevprev[i - 2] + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void Correction::addCharToCurrentWord(const int c) {
|
||||
mWord[mOutputIndex] = c;
|
||||
const int *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
|
||||
calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize, mWord,
|
||||
mOutputIndex + 1);
|
||||
}
|
||||
|
||||
inline int Correction::getFinalProbabilityInternal(const int probability, int **word,
|
||||
int *wordLength, const int inputSize) {
|
||||
const int outputIndex = mTerminalOutputIndex;
|
||||
const int inputIndex = mTerminalInputIndex;
|
||||
*wordLength = outputIndex + 1;
|
||||
*word = mWord;
|
||||
int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability(
|
||||
inputIndex, outputIndex, probability, mEditDistanceTable, this, inputSize);
|
||||
return finalProbability;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_CORRECTION_H
|
|
@ -1,83 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2011 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_CORRECTION_STATE_H
|
||||
#define LATINIME_CORRECTION_STATE_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
struct CorrectionState {
|
||||
int mParentIndex;
|
||||
int mSiblingPos;
|
||||
uint16_t mChildCount;
|
||||
uint8_t mInputIndex;
|
||||
|
||||
uint8_t mEquivalentCharCount;
|
||||
uint8_t mProximityCount;
|
||||
uint8_t mTransposedCount;
|
||||
uint8_t mExcessiveCount;
|
||||
uint8_t mSkippedCount;
|
||||
|
||||
int8_t mTransposedPos;
|
||||
int8_t mExcessivePos;
|
||||
int8_t mSkipPos; // should be signed
|
||||
|
||||
// TODO: int?
|
||||
bool mLastCharExceeded;
|
||||
|
||||
bool mMatching;
|
||||
bool mTransposing;
|
||||
bool mExceeding;
|
||||
bool mSkipping;
|
||||
bool mProximityMatching;
|
||||
bool mAdditionalProximityMatching;
|
||||
|
||||
bool mNeedsToTraverseAllNodes;
|
||||
};
|
||||
|
||||
inline static void initCorrectionState(CorrectionState *state, const int rootPos,
|
||||
const uint16_t childCount, const bool traverseAll) {
|
||||
state->mParentIndex = -1;
|
||||
state->mChildCount = childCount;
|
||||
state->mInputIndex = 0;
|
||||
state->mSiblingPos = rootPos;
|
||||
state->mNeedsToTraverseAllNodes = traverseAll;
|
||||
|
||||
state->mTransposedPos = -1;
|
||||
state->mExcessivePos = -1;
|
||||
state->mSkipPos = -1;
|
||||
|
||||
state->mEquivalentCharCount = 0;
|
||||
state->mProximityCount = 0;
|
||||
state->mTransposedCount = 0;
|
||||
state->mExcessiveCount = 0;
|
||||
state->mSkippedCount = 0;
|
||||
|
||||
state->mLastCharExceeded = false;
|
||||
|
||||
state->mMatching = false;
|
||||
state->mProximityMatching = false;
|
||||
state->mTransposing = false;
|
||||
state->mExceeding = false;
|
||||
state->mSkipping = false;
|
||||
state->mAdditionalProximityMatching = false;
|
||||
}
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_CORRECTION_STATE_H
|
|
@ -23,7 +23,6 @@
|
|||
#include "defines.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
||||
#include "suggest/core/dictionary/binary_format.h"
|
||||
#include "suggest/core/dictionary/bloom_filter.h"
|
||||
#include "suggest/core/dictionary/dictionary.h"
|
||||
#include "suggest/core/dictionary/probability_utils.h"
|
||||
#include "utils/char_utils.h"
|
||||
|
@ -170,30 +169,6 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
|
|||
return pos;
|
||||
}
|
||||
|
||||
void BigramDictionary::fillBigramAddressToProbabilityMapAndFilter(const int *prevWord,
|
||||
const int prevWordLength, std::map<int, int> *map, uint8_t *filter) const {
|
||||
memset(filter, 0, BIGRAM_FILTER_BYTE_SIZE);
|
||||
const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
|
||||
int pos = getBigramListPositionForWord(prevWord, prevWordLength,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (0 == pos) {
|
||||
// If no bigrams for this exact string, search again in lower case.
|
||||
pos = getBigramListPositionForWord(prevWord, prevWordLength,
|
||||
true /* forceLowerCaseSearch */);
|
||||
}
|
||||
if (0 == pos) return;
|
||||
|
||||
uint8_t bigramFlags;
|
||||
do {
|
||||
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||
const int probability = BinaryFormat::MASK_ATTRIBUTE_PROBABILITY & bigramFlags;
|
||||
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
|
||||
&pos);
|
||||
(*map)[bigramPos] = probability;
|
||||
setInFilter(filter, bigramPos);
|
||||
} while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
|
||||
}
|
||||
|
||||
bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) const {
|
||||
// Checks whether this word starts with same character or neighboring characters of
|
||||
// what user typed.
|
||||
|
|
|
@ -17,9 +17,6 @@
|
|||
#ifndef LATINIME_BIGRAM_DICTIONARY_H
|
||||
#define LATINIME_BIGRAM_DICTIONARY_H
|
||||
|
||||
#include <map>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -32,10 +29,9 @@ class BigramDictionary {
|
|||
|
||||
int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords,
|
||||
int *frequencies, int *outputTypes) const;
|
||||
void fillBigramAddressToProbabilityMapAndFilter(const int *prevWord, const int prevWordLength,
|
||||
std::map<int, int> *map, uint8_t *filter) const;
|
||||
bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
|
||||
~BigramDictionary();
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);
|
||||
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "suggest/core/dictionary/bloom_filter.h"
|
||||
#include "suggest/core/dictionary/probability_utils.h"
|
||||
#include "utils/char_utils.h"
|
||||
#include "utils/hash_map_compat.h"
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
#ifndef LATINIME_PROBABILITY_UTILS_H
|
||||
#define LATINIME_PROBABILITY_UTILS_H
|
||||
|
||||
#include <map>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
|
@ -49,24 +48,6 @@ class ProbabilityUtils {
|
|||
+ static_cast<int>(static_cast<float>(bigramProbability + 1) * stepSize);
|
||||
}
|
||||
|
||||
// This returns a probability in log space.
|
||||
static AK_FORCE_INLINE int getProbability(const int position,
|
||||
const std::map<int, int> *const bigramMap,
|
||||
const uint8_t *bigramFilter, const int unigramProbability) {
|
||||
if (!bigramMap || !bigramFilter) {
|
||||
return backoff(unigramProbability);
|
||||
}
|
||||
if (!isInFilter(bigramFilter, position)){
|
||||
return backoff(unigramProbability);
|
||||
}
|
||||
const std::map<int, int>::const_iterator bigramProbabilityIt = bigramMap->find(position);
|
||||
if (bigramProbabilityIt != bigramMap->end()) {
|
||||
const int bigramProbability = bigramProbabilityIt->second;
|
||||
return computeProbabilityForBigram(unigramProbability, bigramProbability);
|
||||
}
|
||||
return backoff(unigramProbability);
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityUtils);
|
||||
};
|
||||
|
|
|
@ -24,8 +24,6 @@
|
|||
|
||||
namespace latinime {
|
||||
|
||||
class Correction;
|
||||
|
||||
class ProximityInfo {
|
||||
public:
|
||||
ProximityInfo(JNIEnv *env, const jstring localeJStr,
|
||||
|
@ -41,7 +39,6 @@ class ProximityInfo {
|
|||
float getNormalizedSquaredDistanceFromCenterFloatG(
|
||||
const int keyId, const int x, const int y,
|
||||
const float verticalScale) const;
|
||||
bool sameAsTyped(const unsigned short *word, int length) const;
|
||||
int getCodePointOf(const int keyIndex) const;
|
||||
bool hasSweetSpotData(const int keyIndex) const {
|
||||
// When there are no calibration data for a key,
|
||||
|
@ -95,8 +92,6 @@ class ProximityInfo {
|
|||
DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo);
|
||||
|
||||
void initializeG();
|
||||
float calculateNormalizedSquaredDistance(const int keyIndex, const int inputIndex) const;
|
||||
bool hasInputCoordinates() const;
|
||||
|
||||
const int GRID_WIDTH;
|
||||
const int GRID_HEIGHT;
|
||||
|
|
|
@ -156,11 +156,6 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
|
|||
if (!isGeometric && pointerId == 0) {
|
||||
ProximityInfoStateUtils::initPrimaryInputWord(
|
||||
inputSize, mInputProximities, mPrimaryInputWord);
|
||||
if (mTouchPositionCorrectionEnabled) {
|
||||
ProximityInfoStateUtils::initNormalizedSquaredDistances(
|
||||
mProximityInfo, inputSize, xCoordinates, yCoordinates, mInputProximities,
|
||||
&mSampledInputXs, &mSampledInputYs, mNormalizedSquaredDistances);
|
||||
}
|
||||
}
|
||||
if (DEBUG_GEO_FULL) {
|
||||
AKLOGI("ProximityState init finished: %d points out of %d", mSampledInputSize, inputSize);
|
||||
|
@ -279,26 +274,6 @@ float ProximityInfoState::getDirection(const int index0, const int index1) const
|
|||
&mSampledInputXs, &mSampledInputYs, index0, index1);
|
||||
}
|
||||
|
||||
float ProximityInfoState::getLineToKeyDistance(
|
||||
const int from, const int to, const int keyId, const bool extend) const {
|
||||
if (from < 0 || from > mSampledInputSize - 1) {
|
||||
return 0.0f;
|
||||
}
|
||||
if (to < 0 || to > mSampledInputSize - 1) {
|
||||
return 0.0f;
|
||||
}
|
||||
const int x0 = mSampledInputXs[from];
|
||||
const int y0 = mSampledInputYs[from];
|
||||
const int x1 = mSampledInputXs[to];
|
||||
const int y1 = mSampledInputYs[to];
|
||||
|
||||
const int keyX = mProximityInfo->getKeyCenterXOfKeyIdG(keyId);
|
||||
const int keyY = mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
|
||||
|
||||
return ProximityInfoUtils::pointToLineSegSquaredDistanceFloat(
|
||||
keyX, keyY, x0, y0, x1, y1, extend);
|
||||
}
|
||||
|
||||
float ProximityInfoState::getMostProbableString(int *const codePointBuf) const {
|
||||
memcpy(codePointBuf, mMostProbableString, sizeof(mMostProbableString));
|
||||
return mMostProbableStringProbability;
|
||||
|
|
|
@ -53,7 +53,6 @@ class ProximityInfoState {
|
|||
mSampledSearchKeyVectors(), mTouchPositionCorrectionEnabled(false),
|
||||
mSampledInputSize(0), mMostProbableStringProbability(0.0f) {
|
||||
memset(mInputProximities, 0, sizeof(mInputProximities));
|
||||
memset(mNormalizedSquaredDistances, 0, sizeof(mNormalizedSquaredDistances));
|
||||
memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord));
|
||||
memset(mMostProbableString, 0, sizeof(mMostProbableString));
|
||||
}
|
||||
|
@ -91,6 +90,19 @@ class ProximityInfoState {
|
|||
return false;
|
||||
}
|
||||
|
||||
// TODO: Promote insertion letter correction if that letter is a proximity of the previous
|
||||
// letter like follows:
|
||||
// // Demotion for a word with excessive character
|
||||
// if (excessiveCount > 0) {
|
||||
// multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
|
||||
// if (!lastCharExceeded
|
||||
// && !proximityInfoState->existsAdjacentProximityChars(excessivePos)) {
|
||||
// // If an excessive character is not adjacent to the left char or the right char,
|
||||
// // we will demote this word.
|
||||
// multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE,
|
||||
// &finalFreq);
|
||||
// }
|
||||
// }
|
||||
inline bool existsAdjacentProximityChars(const int index) const {
|
||||
if (index < 0 || index >= mSampledInputSize) return false;
|
||||
const int currentCodePoint = getPrimaryCodePointAt(index);
|
||||
|
@ -106,12 +118,6 @@ class ProximityInfoState {
|
|||
return false;
|
||||
}
|
||||
|
||||
inline int getNormalizedSquaredDistance(
|
||||
const int inputIndex, const int proximityIndex) const {
|
||||
return mNormalizedSquaredDistances[
|
||||
inputIndex * MAX_PROXIMITY_CHARS_SIZE + proximityIndex];
|
||||
}
|
||||
|
||||
inline const int *getPrimaryInputWord() const {
|
||||
return mPrimaryInputWord;
|
||||
}
|
||||
|
@ -190,24 +196,10 @@ class ProximityInfoState {
|
|||
|
||||
float getProbability(const int index, const int charCode) const;
|
||||
|
||||
float getLineToKeyDistance(
|
||||
const int from, const int to, const int keyId, const bool extend) const;
|
||||
|
||||
bool isKeyInSerchKeysAfterIndex(const int index, const int keyId) const;
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(ProximityInfoState);
|
||||
/////////////////////////////////////////
|
||||
// Defined in proximity_info_state.cpp //
|
||||
/////////////////////////////////////////
|
||||
float calculateNormalizedSquaredDistance(const int keyIndex, const int inputIndex) const;
|
||||
|
||||
float calculateSquaredDistanceFromSweetSpotCenter(
|
||||
const int keyIndex, const int inputIndex) const;
|
||||
|
||||
/////////////////////////////////////////
|
||||
// Defined here //
|
||||
/////////////////////////////////////////
|
||||
|
||||
inline const int *getProximityCodePointsAt(const int index) const {
|
||||
return ProximityInfoStateUtils::getProximityCodePointsAt(mInputProximities, index);
|
||||
|
@ -249,7 +241,6 @@ class ProximityInfoState {
|
|||
std::vector<std::vector<int> > mSampledSearchKeyVectors;
|
||||
bool mTouchPositionCorrectionEnabled;
|
||||
int mInputProximities[MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH];
|
||||
int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH];
|
||||
int mSampledInputSize;
|
||||
int mPrimaryInputWord[MAX_WORD_LENGTH];
|
||||
float mMostProbableStringProbability;
|
||||
|
|
|
@ -181,48 +181,6 @@ namespace latinime {
|
|||
return squaredDistance / squaredRadius;
|
||||
}
|
||||
|
||||
/* static */ void ProximityInfoStateUtils::initNormalizedSquaredDistances(
|
||||
const ProximityInfo *const proximityInfo, const int inputSize, const int *inputXCoordinates,
|
||||
const int *inputYCoordinates, const int *const inputProximities,
|
||||
const std::vector<int> *const sampledInputXs, const std::vector<int> *const sampledInputYs,
|
||||
int *normalizedSquaredDistances) {
|
||||
memset(normalizedSquaredDistances, NOT_A_DISTANCE,
|
||||
sizeof(normalizedSquaredDistances[0]) * MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH);
|
||||
const bool hasInputCoordinates = sampledInputXs->size() > 0 && sampledInputYs->size() > 0;
|
||||
for (int i = 0; i < inputSize; ++i) {
|
||||
const int *proximityCodePoints = getProximityCodePointsAt(inputProximities, i);
|
||||
const int primaryKey = proximityCodePoints[0];
|
||||
const int x = inputXCoordinates[i];
|
||||
const int y = inputYCoordinates[i];
|
||||
if (DEBUG_PROXIMITY_CHARS) {
|
||||
int a = x + y + primaryKey;
|
||||
a += 0;
|
||||
AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y);
|
||||
}
|
||||
for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE && proximityCodePoints[j] > 0; ++j) {
|
||||
const int currentCodePoint = proximityCodePoints[j];
|
||||
const float squaredDistance =
|
||||
hasInputCoordinates ? calculateNormalizedSquaredDistance(
|
||||
proximityInfo, sampledInputXs, sampledInputYs,
|
||||
proximityInfo->getKeyIndexOf(currentCodePoint), i) :
|
||||
ProximityInfoParams::NOT_A_DISTANCE_FLOAT;
|
||||
if (squaredDistance >= 0.0f) {
|
||||
normalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] =
|
||||
static_cast<int>(squaredDistance
|
||||
* ProximityInfoParams::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR);
|
||||
} else {
|
||||
normalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] =
|
||||
(j == 0) ? MATCH_CHAR_WITHOUT_DISTANCE_INFO :
|
||||
PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO;
|
||||
}
|
||||
if (DEBUG_PROXIMITY_CHARS) {
|
||||
AKLOGI("--- Proximity (%d) = %c", j, currentCodePoint);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* static */ void ProximityInfoStateUtils::initGeometricDistanceInfos(
|
||||
const ProximityInfo *const proximityInfo, const int sampledInputSize,
|
||||
const int lastSavedInputSize, const float verticalSweetSpotScale,
|
||||
|
|
|
@ -23,31 +23,6 @@
|
|||
namespace latinime {
|
||||
class TouchPositionCorrectionUtils {
|
||||
public:
|
||||
// TODO: (OLD) Remove
|
||||
static float getLengthScalingFactor(const float normalizedSquaredDistance) {
|
||||
// Promote or demote the score according to the distance from the sweet spot
|
||||
static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f;
|
||||
static const float B = 1.0f;
|
||||
static const float C = 0.5f;
|
||||
static const float MIN = 0.3f;
|
||||
static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS;
|
||||
static const float R2 = HALF_SCORE_SQUARED_RADIUS;
|
||||
const float x = normalizedSquaredDistance / static_cast<float>(
|
||||
ProximityInfoParams::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR);
|
||||
const float factor = max((x < R1)
|
||||
? (A * (R1 - x) + B * x) / R1
|
||||
: (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN);
|
||||
// factor is a piecewise linear function like:
|
||||
// A -_ .
|
||||
// ^-_ .
|
||||
// B \ .
|
||||
// \_ .
|
||||
// C ------------.
|
||||
// .
|
||||
// 0 R1 R2 .
|
||||
return factor;
|
||||
}
|
||||
|
||||
static float getSweetSpotFactor(const bool isTouchPositionCorrectionEnabled,
|
||||
const float normalizedSquaredDistance) {
|
||||
// Promote or demote the score according to the distance from the sweet spot
|
||||
|
|
|
@ -62,6 +62,26 @@ class EditDistance {
|
|||
return dp[(beforeLength + 1) * (afterLength + 1) - 1];
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE static void dumpEditDistance10ForDebug(const float *const editDistanceTable,
|
||||
const int editDistanceTableWidth, const int outputLength) {
|
||||
if (DEBUG_DICT) {
|
||||
AKLOGI("EditDistanceTable");
|
||||
for (int i = 0; i <= 10; ++i) {
|
||||
float c[11];
|
||||
for (int j = 0; j <= 10; ++j) {
|
||||
if (j < editDistanceTableWidth + 1 && i < outputLength + 1) {
|
||||
c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j];
|
||||
} else {
|
||||
c[j] = -1.0f;
|
||||
}
|
||||
}
|
||||
AKLOGI("[ %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f ]",
|
||||
c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
|
||||
(void)c; // To suppress compiler warning
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(EditDistance);
|
||||
};
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "utils/autocorrection_threshold_utils.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/utils/edit_distance.h"
|
||||
#include "suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
const int AutocorrectionThresholdUtils::MAX_INITIAL_SCORE = 255;
|
||||
const int AutocorrectionThresholdUtils::TYPED_LETTER_MULTIPLIER = 2;
|
||||
const int AutocorrectionThresholdUtils::FULL_WORD_MULTIPLIER = 2;
|
||||
|
||||
/* static */ int AutocorrectionThresholdUtils::editDistance(const int *before,
|
||||
const int beforeLength, const int *after, const int afterLength) {
|
||||
const DamerauLevenshteinEditDistancePolicy daemaruLevenshtein(
|
||||
before, beforeLength, after, afterLength);
|
||||
return static_cast<int>(EditDistance::getEditDistance(&daemaruLevenshtein));
|
||||
}
|
||||
|
||||
// In dictionary.cpp, getSuggestion() method,
|
||||
// When USE_SUGGEST_INTERFACE_FOR_TYPING is true:
|
||||
//
|
||||
// // TODO: Revise the following logic thoroughly by referring to the logic
|
||||
// // marked as "Otherwise" below.
|
||||
// SUGGEST_INTERFACE_OUTPUT_SCALE was multiplied to the original suggestion scores to convert
|
||||
// them to integers.
|
||||
// score = (int)((original score) * SUGGEST_INTERFACE_OUTPUT_SCALE)
|
||||
// Undo the scaling here to recover the original score.
|
||||
// normalizedScore = ((float)score) / SUGGEST_INTERFACE_OUTPUT_SCALE
|
||||
//
|
||||
// Otherwise: suggestion scores are computed using the below formula.
|
||||
// original score
|
||||
// := powf(mTypedLetterMultiplier (this is defined 2),
|
||||
// (the number of matched characters between typed word and suggested word))
|
||||
// * (individual word's score which defined in the unigram dictionary,
|
||||
// and this score is defined in range [0, 255].)
|
||||
// Then, the following processing is applied.
|
||||
// - If the dictionary word is matched up to the point of the user entry
|
||||
// (full match up to min(before.length(), after.length())
|
||||
// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
|
||||
// - If the word is a true full match except for differences in accents or
|
||||
// capitalization, then treat it as if the score was 255.
|
||||
// - If before.length() == after.length()
|
||||
// => multiply by mFullWordMultiplier (this is defined 2))
|
||||
// So, maximum original score is powf(2, min(before.length(), after.length())) * 255 * 2 * 1.2
|
||||
// For historical reasons we ignore the 1.2 modifier (because the measure for a good
|
||||
// autocorrection threshold was done at a time when it didn't exist). This doesn't change
|
||||
// the result.
|
||||
// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2.
|
||||
|
||||
/* static */ float AutocorrectionThresholdUtils::calcNormalizedScore(const int *before,
|
||||
const int beforeLength, const int *after, const int afterLength, const int score) {
|
||||
if (0 == beforeLength || 0 == afterLength) {
|
||||
return 0.0f;
|
||||
}
|
||||
const int distance = editDistance(before, beforeLength, after, afterLength);
|
||||
int spaceCount = 0;
|
||||
for (int i = 0; i < afterLength; ++i) {
|
||||
if (after[i] == KEYCODE_SPACE) {
|
||||
++spaceCount;
|
||||
}
|
||||
}
|
||||
|
||||
if (spaceCount == afterLength) {
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
// add a weight based on edit distance.
|
||||
// distance <= max(afterLength, beforeLength) == afterLength,
|
||||
// so, 0 <= distance / afterLength <= 1
|
||||
const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength);
|
||||
|
||||
// TODO: Revise the following logic thoroughly by referring to...
|
||||
if (true /* USE_SUGGEST_INTERFACE_FOR_TYPING */) {
|
||||
return (static_cast<float>(score) / SUGGEST_INTERFACE_OUTPUT_SCALE) * weight;
|
||||
}
|
||||
// ...this logic.
|
||||
const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX)
|
||||
: static_cast<float>(MAX_INITIAL_SCORE)
|
||||
* powf(static_cast<float>(TYPED_LETTER_MULTIPLIER),
|
||||
static_cast<float>(min(beforeLength, afterLength - spaceCount)))
|
||||
* static_cast<float>(FULL_WORD_MULTIPLIER);
|
||||
|
||||
return (static_cast<float>(score) / maxScore) * weight;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_AUTOCORRECTION_THRESHOLD_UTILS_H
|
||||
#define LATINIME_AUTOCORRECTION_THRESHOLD_UTILS_H
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class AutocorrectionThresholdUtils {
|
||||
public:
|
||||
static float calcNormalizedScore(const int *before, const int beforeLength,
|
||||
const int *after, const int afterLength, const int score);
|
||||
static int editDistance(const int *before, const int beforeLength, const int *after,
|
||||
const int afterLength);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(AutocorrectionThresholdUtils);
|
||||
|
||||
static const int MAX_INITIAL_SCORE;
|
||||
static const int TYPED_LETTER_MULTIPLIER;
|
||||
static const int FULL_WORD_MULTIPLIER;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_AUTOCORRECTION_THRESHOLD_UTILS_H
|
Loading…
Reference in New Issue