am 527c1283: Remove unused methods.

* commit '527c128309da708d0fdaf7928da833320d1754e9':
  Remove unused methods.
main
Keisuke Kuroynagi 2013-06-11 01:57:02 -07:00 committed by Android Git Automerger
commit 675507766a
17 changed files with 185 additions and 1662 deletions

View File

@ -47,8 +47,6 @@ LATIN_IME_JNI_SRC_FILES := \
LATIN_IME_CORE_SRC_FILES := \ LATIN_IME_CORE_SRC_FILES := \
suggest/core/suggest.cpp \ suggest/core/suggest.cpp \
$(addprefix obsolete/, \
correction.cpp) \
$(addprefix suggest/core/dicnode/, \ $(addprefix suggest/core/dicnode/, \
dic_node.cpp \ dic_node.cpp \
dic_node_utils.cpp \ dic_node_utils.cpp \
@ -76,7 +74,9 @@ LATIN_IME_CORE_SRC_FILES := \
typing_suggest_policy.cpp \ typing_suggest_policy.cpp \
typing_traversal.cpp \ typing_traversal.cpp \
typing_weighting.cpp) \ typing_weighting.cpp) \
utils/char_utils.cpp $(addprefix utils/, \
char_utils.cpp \
ranking_algorithm.cpp)
LOCAL_SRC_FILES := \ LOCAL_SRC_FILES := \
$(LATIN_IME_JNI_SRC_FILES) \ $(LATIN_IME_JNI_SRC_FILES) \

View File

@ -22,15 +22,16 @@
#include <cstring> // for memset() #include <cstring> // for memset()
#include <fcntl.h> #include <fcntl.h>
#include <sys/mman.h> #include <sys/mman.h>
#include <unistd.h>
#include "defines.h" #include "defines.h"
#include "jni.h" #include "jni.h"
#include "jni_common.h" #include "jni_common.h"
#include "obsolete/correction.h"
#include "suggest/core/dictionary/binary_dictionary_format_utils.h" #include "suggest/core/dictionary/binary_dictionary_format_utils.h"
#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/suggest_options.h" #include "suggest/core/suggest_options.h"
#include "utils/autocorrection_threshold_utils.h"
namespace latinime { namespace latinime {
@ -201,7 +202,7 @@ static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass
int afterCodePoints[afterLength]; int afterCodePoints[afterLength];
env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints); env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints); env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
return Correction::RankingAlgorithm::calcNormalizedScore(beforeCodePoints, beforeLength, return AutocorrectionThresholdUtils::calcNormalizedScore(beforeCodePoints, beforeLength,
afterCodePoints, afterLength, score); afterCodePoints, afterLength, score);
} }
@ -213,7 +214,7 @@ static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jclass clazz, ji
int afterCodePoints[afterLength]; int afterCodePoints[afterLength];
env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints); env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints); env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
return Correction::RankingAlgorithm::editDistance(beforeCodePoints, beforeLength, return AutocorrectionThresholdUtils::editDistance(beforeCodePoints, beforeLength,
afterCodePoints, afterLength); afterCodePoints, afterLength);
} }

File diff suppressed because it is too large Load Diff

View File

@ -1,398 +0,0 @@
/*
* Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_CORRECTION_H
#define LATINIME_CORRECTION_H
#include <cstring> // for memset()
#include "defines.h"
#include "obsolete/correction_state.h"
#include "suggest/core/layout/proximity_info_state.h"
#include "utils/char_utils.h"
namespace latinime {
class ProximityInfo;
class Correction {
public:
typedef enum {
TRAVERSE_ALL_ON_TERMINAL,
TRAVERSE_ALL_NOT_ON_TERMINAL,
UNRELATED,
ON_TERMINAL,
NOT_ON_TERMINAL
} CorrectionType;
Correction()
: mProximityInfo(0), mUseFullEditDistance(false), mDoAutoCompletion(false),
mMaxEditDistance(0), mMaxDepth(0), mInputSize(0), mSpaceProximityPos(0),
mMissingSpacePos(0), mTerminalInputIndex(0), mTerminalOutputIndex(0), mMaxErrors(0),
mTotalTraverseCount(0), mNeedsToTraverseAllNodes(false), mOutputIndex(0),
mInputIndex(0), mEquivalentCharCount(0), mProximityCount(0), mExcessiveCount(0),
mTransposedCount(0), mSkippedCount(0), mTransposedPos(0), mExcessivePos(0),
mSkipPos(0), mLastCharExceeded(false), mMatching(false), mProximityMatching(false),
mAdditionalProximityMatching(false), mExceeding(false), mTransposing(false),
mSkipping(false), mProximityInfoState() {
memset(mWord, 0, sizeof(mWord));
memset(mDistances, 0, sizeof(mDistances));
memset(mEditDistanceTable, 0, sizeof(mEditDistanceTable));
// NOTE: mCorrectionStates is an array of instances.
// No need to initialize it explicitly here.
}
// Non virtual inline destructor -- never inherit this class
~Correction() {}
void resetCorrection();
void initCorrection(const ProximityInfo *pi, const int inputSize, const int maxDepth);
void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll);
// TODO: remove
void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
const bool doAutoCompletion, const int maxErrors);
void checkState() const;
bool sameAsTyped() const;
bool initProcessState(const int index);
int getInputIndex() const;
bool needsToPrune() const;
int pushAndGetTotalTraverseCount() {
return ++mTotalTraverseCount;
}
int getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
const int wordCount, const bool isSpaceProximity, const int *word) const;
int getFinalProbability(const int probability, int **word, int *wordLength);
int getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength,
const int inputSize);
CorrectionType processCharAndCalcState(const int c, const bool isTerminal);
/////////////////////////
// Tree helper methods
int goDownTree(const int parentIndex, const int childCount, const int firstChildPos);
inline int getTreeSiblingPos(const int index) const {
return mCorrectionStates[index].mSiblingPos;
}
inline void setTreeSiblingPos(const int index, const int pos) {
mCorrectionStates[index].mSiblingPos = pos;
}
inline int getTreeParentIndex(const int index) const {
return mCorrectionStates[index].mParentIndex;
}
class RankingAlgorithm {
public:
static int calculateFinalProbability(const int inputIndex, const int depth,
const int probability, int *editDistanceTable, const Correction *correction,
const int inputSize);
static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
const int wordCount, const Correction *correction, const bool isSpaceProximity,
const int *word);
static float calcNormalizedScore(const int *before, const int beforeLength,
const int *after, const int afterLength, const int score);
static int editDistance(const int *before, const int beforeLength, const int *after,
const int afterLength);
private:
static const int MAX_INITIAL_SCORE = 255;
};
// proximity info state
void initInputParams(const ProximityInfo *proximityInfo, const int *inputCodes,
const int inputSize, const int *xCoordinates, const int *yCoordinates) {
mProximityInfoState.initInputParams(0, static_cast<float>(MAX_VALUE_FOR_WEIGHTING),
proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false);
}
const int *getPrimaryInputWord() const {
return mProximityInfoState.getPrimaryInputWord();
}
int getPrimaryCodePointAt(const int index) const {
return mProximityInfoState.getPrimaryCodePointAt(index);
}
private:
DISALLOW_COPY_AND_ASSIGN(Correction);
// The following "rate"s are used as a multiplier before dividing by 100, so they are in
// percent.
static const int WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE;
static const int WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X;
static const int WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE;
static const int WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE;
static const int WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE;
static const int WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE;
static const int WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE;
static const int FULL_MATCHED_WORDS_PROMOTION_RATE;
static const int WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE;
static const int WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE;
static const int WORDS_WITH_MATCH_SKIP_PROMOTION_RATE;
static const int WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE;
static const int WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER;
static const int CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE;
static const int INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE;
static const int FIRST_CHAR_DIFFERENT_DEMOTION_RATE;
static const int TWO_WORDS_CAPITALIZED_DEMOTION_RATE;
static const int TWO_WORDS_CORRECTION_DEMOTION_BASE;
/////////////////////////
// static inline utils //
/////////////////////////
static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
}
static const int TWO_31ST_DIV_2 = S_INT_MAX / 2;
AK_FORCE_INLINE static void multiplyIntCapped(const int multiplier, int *base) {
const int temp = *base;
if (temp != S_INT_MAX) {
// Branch if multiplier == 2 for the optimization
if (multiplier < 0) {
if (DEBUG_DICT) {
ASSERT(false);
}
AKLOGI("--- Invalid multiplier: %d", multiplier);
} else if (multiplier == 0) {
*base = 0;
} else if (multiplier == 2) {
*base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX;
} else {
// TODO: This overflow check gives a wrong answer when, for example,
// temp = 2^16 + 1 and multiplier = 2^17 + 1.
// Fix this behavior.
const int tempRetval = temp * multiplier;
*base = tempRetval >= temp ? tempRetval : S_INT_MAX;
}
}
}
AK_FORCE_INLINE static int powerIntCapped(const int base, const int n) {
if (n <= 0) return 1;
if (base == 2) {
return n < 31 ? 1 << n : S_INT_MAX;
}
int ret = base;
for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret);
return ret;
}
AK_FORCE_INLINE static void multiplyRate(const int rate, int *freq) {
if (*freq != S_INT_MAX) {
if (*freq > 1000000) {
*freq /= 100;
multiplyIntCapped(rate, freq);
} else {
multiplyIntCapped(rate, freq);
*freq /= 100;
}
}
}
inline int getSpaceProximityPos() const {
return mSpaceProximityPos;
}
inline int getMissingSpacePos() const {
return mMissingSpacePos;
}
inline int getSkipPos() const {
return mSkipPos;
}
inline int getExcessivePos() const {
return mExcessivePos;
}
inline int getTransposedPos() const {
return mTransposedPos;
}
inline void incrementInputIndex();
inline void incrementOutputIndex();
inline void startToTraverseAllNodes();
inline bool isSingleQuote(const int c);
inline CorrectionType processSkipChar(const int c, const bool isTerminal,
const bool inputIndexIncremented);
inline CorrectionType processUnrelatedCorrectionType();
inline void addCharToCurrentWord(const int c);
inline int getFinalProbabilityInternal(const int probability, int **word, int *wordLength,
const int inputSize);
static const int TYPED_LETTER_MULTIPLIER = 2;
static const int FULL_WORD_MULTIPLIER = 2;
const ProximityInfo *mProximityInfo;
bool mUseFullEditDistance;
bool mDoAutoCompletion;
int mMaxEditDistance;
int mMaxDepth;
int mInputSize;
int mSpaceProximityPos;
int mMissingSpacePos;
int mTerminalInputIndex;
int mTerminalOutputIndex;
int mMaxErrors;
int mTotalTraverseCount;
// The following arrays are state buffer.
int mWord[MAX_WORD_LENGTH];
int mDistances[MAX_WORD_LENGTH];
// Edit distance calculation requires a buffer with (N+1)^2 length for the input length N.
// Caveat: Do not create multiple tables per thread as this table eats up RAM a lot.
int mEditDistanceTable[(MAX_WORD_LENGTH + 1) * (MAX_WORD_LENGTH + 1)];
CorrectionState mCorrectionStates[MAX_WORD_LENGTH];
// The following member variables are being used as cache values of the correction state.
bool mNeedsToTraverseAllNodes;
int mOutputIndex;
int mInputIndex;
int mEquivalentCharCount;
int mProximityCount;
int mExcessiveCount;
int mTransposedCount;
int mSkippedCount;
int mTransposedPos;
int mExcessivePos;
int mSkipPos;
bool mLastCharExceeded;
bool mMatching;
bool mProximityMatching;
bool mAdditionalProximityMatching;
bool mExceeding;
bool mTransposing;
bool mSkipping;
ProximityInfoState mProximityInfoState;
};
inline void Correction::incrementInputIndex() {
++mInputIndex;
}
AK_FORCE_INLINE void Correction::incrementOutputIndex() {
++mOutputIndex;
mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
mCorrectionStates[mOutputIndex].mEquivalentCharCount = mEquivalentCharCount;
mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount;
mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount;
mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
mCorrectionStates[mOutputIndex].mTransposedPos = mTransposedPos;
mCorrectionStates[mOutputIndex].mExcessivePos = mExcessivePos;
mCorrectionStates[mOutputIndex].mLastCharExceeded = mLastCharExceeded;
mCorrectionStates[mOutputIndex].mMatching = mMatching;
mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching;
mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching;
mCorrectionStates[mOutputIndex].mTransposing = mTransposing;
mCorrectionStates[mOutputIndex].mExceeding = mExceeding;
mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
}
inline void Correction::startToTraverseAllNodes() {
mNeedsToTraverseAllNodes = true;
}
AK_FORCE_INLINE bool Correction::isSingleQuote(const int c) {
const int userTypedChar = mProximityInfoState.getPrimaryCodePointAt(mInputIndex);
return (c == KEYCODE_SINGLE_QUOTE && userTypedChar != KEYCODE_SINGLE_QUOTE);
}
AK_FORCE_INLINE Correction::CorrectionType Correction::processSkipChar(const int c,
const bool isTerminal, const bool inputIndexIncremented) {
addCharToCurrentWord(c);
mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
mTerminalOutputIndex = mOutputIndex;
incrementOutputIndex();
if (mNeedsToTraverseAllNodes && isTerminal) {
return TRAVERSE_ALL_ON_TERMINAL;
}
return TRAVERSE_ALL_NOT_ON_TERMINAL;
}
inline Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
// Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
mTerminalInputIndex = mInputIndex;
mTerminalOutputIndex = mOutputIndex;
return UNRELATED;
}
AK_FORCE_INLINE static void calcEditDistanceOneStep(int *editDistanceTable, const int *input,
const int inputSize, const int *output, const int outputLength) {
// TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH] is not touched.
// Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j].
// Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated,
// and calculate dp[ouputLength][0] ... dp[outputLength][inputSize].
int *const current = editDistanceTable + outputLength * (inputSize + 1);
const int *const prev = editDistanceTable + (outputLength - 1) * (inputSize + 1);
const int *const prevprev =
outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
current[0] = outputLength;
const int co = CharUtils::toBaseLowerCase(output[outputLength - 1]);
const int prevCO = outputLength >= 2 ? CharUtils::toBaseLowerCase(output[outputLength - 2]) : 0;
for (int i = 1; i <= inputSize; ++i) {
const int ci = CharUtils::toBaseLowerCase(input[i - 1]);
const int cost = (ci == co) ? 0 : 1;
current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
if (i >= 2 && prevprev && ci == prevCO && co == CharUtils::toBaseLowerCase(input[i - 2])) {
current[i] = min(current[i], prevprev[i - 2] + 1);
}
}
}
AK_FORCE_INLINE void Correction::addCharToCurrentWord(const int c) {
mWord[mOutputIndex] = c;
const int *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize, mWord,
mOutputIndex + 1);
}
inline int Correction::getFinalProbabilityInternal(const int probability, int **word,
int *wordLength, const int inputSize) {
const int outputIndex = mTerminalOutputIndex;
const int inputIndex = mTerminalInputIndex;
*wordLength = outputIndex + 1;
*word = mWord;
int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability(
inputIndex, outputIndex, probability, mEditDistanceTable, this, inputSize);
return finalProbability;
}
} // namespace latinime
#endif // LATINIME_CORRECTION_H

View File

@ -1,83 +0,0 @@
/*
* Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_CORRECTION_STATE_H
#define LATINIME_CORRECTION_STATE_H
#include <stdint.h>
#include "defines.h"
namespace latinime {
struct CorrectionState {
int mParentIndex;
int mSiblingPos;
uint16_t mChildCount;
uint8_t mInputIndex;
uint8_t mEquivalentCharCount;
uint8_t mProximityCount;
uint8_t mTransposedCount;
uint8_t mExcessiveCount;
uint8_t mSkippedCount;
int8_t mTransposedPos;
int8_t mExcessivePos;
int8_t mSkipPos; // should be signed
// TODO: int?
bool mLastCharExceeded;
bool mMatching;
bool mTransposing;
bool mExceeding;
bool mSkipping;
bool mProximityMatching;
bool mAdditionalProximityMatching;
bool mNeedsToTraverseAllNodes;
};
inline static void initCorrectionState(CorrectionState *state, const int rootPos,
const uint16_t childCount, const bool traverseAll) {
state->mParentIndex = -1;
state->mChildCount = childCount;
state->mInputIndex = 0;
state->mSiblingPos = rootPos;
state->mNeedsToTraverseAllNodes = traverseAll;
state->mTransposedPos = -1;
state->mExcessivePos = -1;
state->mSkipPos = -1;
state->mEquivalentCharCount = 0;
state->mProximityCount = 0;
state->mTransposedCount = 0;
state->mExcessiveCount = 0;
state->mSkippedCount = 0;
state->mLastCharExceeded = false;
state->mMatching = false;
state->mProximityMatching = false;
state->mTransposing = false;
state->mExceeding = false;
state->mSkipping = false;
state->mAdditionalProximityMatching = false;
}
} // namespace latinime
#endif // LATINIME_CORRECTION_STATE_H

View File

@ -23,7 +23,6 @@
#include "defines.h" #include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_format.h" #include "suggest/core/dictionary/binary_format.h"
#include "suggest/core/dictionary/bloom_filter.h"
#include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/probability_utils.h" #include "suggest/core/dictionary/probability_utils.h"
#include "utils/char_utils.h" #include "utils/char_utils.h"
@ -170,30 +169,6 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
return pos; return pos;
} }
void BigramDictionary::fillBigramAddressToProbabilityMapAndFilter(const int *prevWord,
const int prevWordLength, std::map<int, int> *map, uint8_t *filter) const {
memset(filter, 0, BIGRAM_FILTER_BYTE_SIZE);
const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
int pos = getBigramListPositionForWord(prevWord, prevWordLength,
false /* forceLowerCaseSearch */);
if (0 == pos) {
// If no bigrams for this exact string, search again in lower case.
pos = getBigramListPositionForWord(prevWord, prevWordLength,
true /* forceLowerCaseSearch */);
}
if (0 == pos) return;
uint8_t bigramFlags;
do {
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
const int probability = BinaryFormat::MASK_ATTRIBUTE_PROBABILITY & bigramFlags;
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
&pos);
(*map)[bigramPos] = probability;
setInFilter(filter, bigramPos);
} while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
}
bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) const { bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) const {
// Checks whether this word starts with same character or neighboring characters of // Checks whether this word starts with same character or neighboring characters of
// what user typed. // what user typed.

View File

@ -17,9 +17,6 @@
#ifndef LATINIME_BIGRAM_DICTIONARY_H #ifndef LATINIME_BIGRAM_DICTIONARY_H
#define LATINIME_BIGRAM_DICTIONARY_H #define LATINIME_BIGRAM_DICTIONARY_H
#include <map>
#include <stdint.h>
#include "defines.h" #include "defines.h"
namespace latinime { namespace latinime {
@ -32,10 +29,9 @@ class BigramDictionary {
int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords, int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords,
int *frequencies, int *outputTypes) const; int *frequencies, int *outputTypes) const;
void fillBigramAddressToProbabilityMapAndFilter(const int *prevWord, const int prevWordLength,
std::map<int, int> *map, uint8_t *filter) const;
bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const; bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
~BigramDictionary(); ~BigramDictionary();
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary); DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);

View File

@ -19,7 +19,6 @@
#include <stdint.h> #include <stdint.h>
#include "suggest/core/dictionary/bloom_filter.h"
#include "suggest/core/dictionary/probability_utils.h" #include "suggest/core/dictionary/probability_utils.h"
#include "utils/char_utils.h" #include "utils/char_utils.h"
#include "utils/hash_map_compat.h" #include "utils/hash_map_compat.h"

View File

@ -17,7 +17,6 @@
#ifndef LATINIME_PROBABILITY_UTILS_H #ifndef LATINIME_PROBABILITY_UTILS_H
#define LATINIME_PROBABILITY_UTILS_H #define LATINIME_PROBABILITY_UTILS_H
#include <map>
#include <stdint.h> #include <stdint.h>
#include "defines.h" #include "defines.h"
@ -49,24 +48,6 @@ class ProbabilityUtils {
+ static_cast<int>(static_cast<float>(bigramProbability + 1) * stepSize); + static_cast<int>(static_cast<float>(bigramProbability + 1) * stepSize);
} }
// This returns a probability in log space.
static AK_FORCE_INLINE int getProbability(const int position,
const std::map<int, int> *const bigramMap,
const uint8_t *bigramFilter, const int unigramProbability) {
if (!bigramMap || !bigramFilter) {
return backoff(unigramProbability);
}
if (!isInFilter(bigramFilter, position)){
return backoff(unigramProbability);
}
const std::map<int, int>::const_iterator bigramProbabilityIt = bigramMap->find(position);
if (bigramProbabilityIt != bigramMap->end()) {
const int bigramProbability = bigramProbabilityIt->second;
return computeProbabilityForBigram(unigramProbability, bigramProbability);
}
return backoff(unigramProbability);
}
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityUtils);
}; };

View File

@ -24,8 +24,6 @@
namespace latinime { namespace latinime {
class Correction;
class ProximityInfo { class ProximityInfo {
public: public:
ProximityInfo(JNIEnv *env, const jstring localeJStr, ProximityInfo(JNIEnv *env, const jstring localeJStr,
@ -41,7 +39,6 @@ class ProximityInfo {
float getNormalizedSquaredDistanceFromCenterFloatG( float getNormalizedSquaredDistanceFromCenterFloatG(
const int keyId, const int x, const int y, const int keyId, const int x, const int y,
const float verticalScale) const; const float verticalScale) const;
bool sameAsTyped(const unsigned short *word, int length) const;
int getCodePointOf(const int keyIndex) const; int getCodePointOf(const int keyIndex) const;
bool hasSweetSpotData(const int keyIndex) const { bool hasSweetSpotData(const int keyIndex) const {
// When there are no calibration data for a key, // When there are no calibration data for a key,
@ -95,8 +92,6 @@ class ProximityInfo {
DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo); DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo);
void initializeG(); void initializeG();
float calculateNormalizedSquaredDistance(const int keyIndex, const int inputIndex) const;
bool hasInputCoordinates() const;
const int GRID_WIDTH; const int GRID_WIDTH;
const int GRID_HEIGHT; const int GRID_HEIGHT;

View File

@ -156,11 +156,6 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
if (!isGeometric && pointerId == 0) { if (!isGeometric && pointerId == 0) {
ProximityInfoStateUtils::initPrimaryInputWord( ProximityInfoStateUtils::initPrimaryInputWord(
inputSize, mInputProximities, mPrimaryInputWord); inputSize, mInputProximities, mPrimaryInputWord);
if (mTouchPositionCorrectionEnabled) {
ProximityInfoStateUtils::initNormalizedSquaredDistances(
mProximityInfo, inputSize, xCoordinates, yCoordinates, mInputProximities,
&mSampledInputXs, &mSampledInputYs, mNormalizedSquaredDistances);
}
} }
if (DEBUG_GEO_FULL) { if (DEBUG_GEO_FULL) {
AKLOGI("ProximityState init finished: %d points out of %d", mSampledInputSize, inputSize); AKLOGI("ProximityState init finished: %d points out of %d", mSampledInputSize, inputSize);
@ -279,26 +274,6 @@ float ProximityInfoState::getDirection(const int index0, const int index1) const
&mSampledInputXs, &mSampledInputYs, index0, index1); &mSampledInputXs, &mSampledInputYs, index0, index1);
} }
float ProximityInfoState::getLineToKeyDistance(
const int from, const int to, const int keyId, const bool extend) const {
if (from < 0 || from > mSampledInputSize - 1) {
return 0.0f;
}
if (to < 0 || to > mSampledInputSize - 1) {
return 0.0f;
}
const int x0 = mSampledInputXs[from];
const int y0 = mSampledInputYs[from];
const int x1 = mSampledInputXs[to];
const int y1 = mSampledInputYs[to];
const int keyX = mProximityInfo->getKeyCenterXOfKeyIdG(keyId);
const int keyY = mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
return ProximityInfoUtils::pointToLineSegSquaredDistanceFloat(
keyX, keyY, x0, y0, x1, y1, extend);
}
float ProximityInfoState::getMostProbableString(int *const codePointBuf) const { float ProximityInfoState::getMostProbableString(int *const codePointBuf) const {
memcpy(codePointBuf, mMostProbableString, sizeof(mMostProbableString)); memcpy(codePointBuf, mMostProbableString, sizeof(mMostProbableString));
return mMostProbableStringProbability; return mMostProbableStringProbability;

View File

@ -53,7 +53,6 @@ class ProximityInfoState {
mSampledSearchKeyVectors(), mTouchPositionCorrectionEnabled(false), mSampledSearchKeyVectors(), mTouchPositionCorrectionEnabled(false),
mSampledInputSize(0), mMostProbableStringProbability(0.0f) { mSampledInputSize(0), mMostProbableStringProbability(0.0f) {
memset(mInputProximities, 0, sizeof(mInputProximities)); memset(mInputProximities, 0, sizeof(mInputProximities));
memset(mNormalizedSquaredDistances, 0, sizeof(mNormalizedSquaredDistances));
memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord)); memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord));
memset(mMostProbableString, 0, sizeof(mMostProbableString)); memset(mMostProbableString, 0, sizeof(mMostProbableString));
} }
@ -91,6 +90,19 @@ class ProximityInfoState {
return false; return false;
} }
// TODO: Promote insertion letter correction if that letter is a proximity of the previous
// letter like follows:
// // Demotion for a word with excessive character
// if (excessiveCount > 0) {
// multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
// if (!lastCharExceeded
// && !proximityInfoState->existsAdjacentProximityChars(excessivePos)) {
// // If an excessive character is not adjacent to the left char or the right char,
// // we will demote this word.
// multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE,
// &finalFreq);
// }
// }
inline bool existsAdjacentProximityChars(const int index) const { inline bool existsAdjacentProximityChars(const int index) const {
if (index < 0 || index >= mSampledInputSize) return false; if (index < 0 || index >= mSampledInputSize) return false;
const int currentCodePoint = getPrimaryCodePointAt(index); const int currentCodePoint = getPrimaryCodePointAt(index);
@ -106,12 +118,6 @@ class ProximityInfoState {
return false; return false;
} }
inline int getNormalizedSquaredDistance(
const int inputIndex, const int proximityIndex) const {
return mNormalizedSquaredDistances[
inputIndex * MAX_PROXIMITY_CHARS_SIZE + proximityIndex];
}
inline const int *getPrimaryInputWord() const { inline const int *getPrimaryInputWord() const {
return mPrimaryInputWord; return mPrimaryInputWord;
} }
@ -190,24 +196,10 @@ class ProximityInfoState {
float getProbability(const int index, const int charCode) const; float getProbability(const int index, const int charCode) const;
float getLineToKeyDistance(
const int from, const int to, const int keyId, const bool extend) const;
bool isKeyInSerchKeysAfterIndex(const int index, const int keyId) const; bool isKeyInSerchKeysAfterIndex(const int index, const int keyId) const;
private: private:
DISALLOW_COPY_AND_ASSIGN(ProximityInfoState); DISALLOW_COPY_AND_ASSIGN(ProximityInfoState);
/////////////////////////////////////////
// Defined in proximity_info_state.cpp //
/////////////////////////////////////////
float calculateNormalizedSquaredDistance(const int keyIndex, const int inputIndex) const;
float calculateSquaredDistanceFromSweetSpotCenter(
const int keyIndex, const int inputIndex) const;
/////////////////////////////////////////
// Defined here //
/////////////////////////////////////////
inline const int *getProximityCodePointsAt(const int index) const { inline const int *getProximityCodePointsAt(const int index) const {
return ProximityInfoStateUtils::getProximityCodePointsAt(mInputProximities, index); return ProximityInfoStateUtils::getProximityCodePointsAt(mInputProximities, index);
@ -249,7 +241,6 @@ class ProximityInfoState {
std::vector<std::vector<int> > mSampledSearchKeyVectors; std::vector<std::vector<int> > mSampledSearchKeyVectors;
bool mTouchPositionCorrectionEnabled; bool mTouchPositionCorrectionEnabled;
int mInputProximities[MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH]; int mInputProximities[MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH];
int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH];
int mSampledInputSize; int mSampledInputSize;
int mPrimaryInputWord[MAX_WORD_LENGTH]; int mPrimaryInputWord[MAX_WORD_LENGTH];
float mMostProbableStringProbability; float mMostProbableStringProbability;

View File

@ -181,48 +181,6 @@ namespace latinime {
return squaredDistance / squaredRadius; return squaredDistance / squaredRadius;
} }
/* static */ void ProximityInfoStateUtils::initNormalizedSquaredDistances(
const ProximityInfo *const proximityInfo, const int inputSize, const int *inputXCoordinates,
const int *inputYCoordinates, const int *const inputProximities,
const std::vector<int> *const sampledInputXs, const std::vector<int> *const sampledInputYs,
int *normalizedSquaredDistances) {
memset(normalizedSquaredDistances, NOT_A_DISTANCE,
sizeof(normalizedSquaredDistances[0]) * MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH);
const bool hasInputCoordinates = sampledInputXs->size() > 0 && sampledInputYs->size() > 0;
for (int i = 0; i < inputSize; ++i) {
const int *proximityCodePoints = getProximityCodePointsAt(inputProximities, i);
const int primaryKey = proximityCodePoints[0];
const int x = inputXCoordinates[i];
const int y = inputYCoordinates[i];
if (DEBUG_PROXIMITY_CHARS) {
int a = x + y + primaryKey;
a += 0;
AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y);
}
for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE && proximityCodePoints[j] > 0; ++j) {
const int currentCodePoint = proximityCodePoints[j];
const float squaredDistance =
hasInputCoordinates ? calculateNormalizedSquaredDistance(
proximityInfo, sampledInputXs, sampledInputYs,
proximityInfo->getKeyIndexOf(currentCodePoint), i) :
ProximityInfoParams::NOT_A_DISTANCE_FLOAT;
if (squaredDistance >= 0.0f) {
normalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] =
static_cast<int>(squaredDistance
* ProximityInfoParams::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR);
} else {
normalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] =
(j == 0) ? MATCH_CHAR_WITHOUT_DISTANCE_INFO :
PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO;
}
if (DEBUG_PROXIMITY_CHARS) {
AKLOGI("--- Proximity (%d) = %c", j, currentCodePoint);
}
}
}
}
/* static */ void ProximityInfoStateUtils::initGeometricDistanceInfos( /* static */ void ProximityInfoStateUtils::initGeometricDistanceInfos(
const ProximityInfo *const proximityInfo, const int sampledInputSize, const ProximityInfo *const proximityInfo, const int sampledInputSize,
const int lastSavedInputSize, const float verticalSweetSpotScale, const int lastSavedInputSize, const float verticalSweetSpotScale,

View File

@ -23,31 +23,6 @@
namespace latinime { namespace latinime {
class TouchPositionCorrectionUtils { class TouchPositionCorrectionUtils {
public: public:
// TODO: (OLD) Remove
static float getLengthScalingFactor(const float normalizedSquaredDistance) {
// Promote or demote the score according to the distance from the sweet spot
static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f;
static const float B = 1.0f;
static const float C = 0.5f;
static const float MIN = 0.3f;
static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS;
static const float R2 = HALF_SCORE_SQUARED_RADIUS;
const float x = normalizedSquaredDistance / static_cast<float>(
ProximityInfoParams::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR);
const float factor = max((x < R1)
? (A * (R1 - x) + B * x) / R1
: (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN);
// factor is a piecewise linear function like:
// A -_ .
// ^-_ .
// B \ .
// \_ .
// C ------------.
// .
// 0 R1 R2 .
return factor;
}
static float getSweetSpotFactor(const bool isTouchPositionCorrectionEnabled, static float getSweetSpotFactor(const bool isTouchPositionCorrectionEnabled,
const float normalizedSquaredDistance) { const float normalizedSquaredDistance) {
// Promote or demote the score according to the distance from the sweet spot // Promote or demote the score according to the distance from the sweet spot

View File

@ -62,6 +62,26 @@ class EditDistance {
return dp[(beforeLength + 1) * (afterLength + 1) - 1]; return dp[(beforeLength + 1) * (afterLength + 1) - 1];
} }
AK_FORCE_INLINE static void dumpEditDistance10ForDebug(const float *const editDistanceTable,
const int editDistanceTableWidth, const int outputLength) {
if (DEBUG_DICT) {
AKLOGI("EditDistanceTable");
for (int i = 0; i <= 10; ++i) {
float c[11];
for (int j = 0; j <= 10; ++j) {
if (j < editDistanceTableWidth + 1 && i < outputLength + 1) {
c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j];
} else {
c[j] = -1.0f;
}
}
AKLOGI("[ %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f ]",
c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
(void)c; // To suppress compiler warning
}
}
}
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(EditDistance); DISALLOW_IMPLICIT_CONSTRUCTORS(EditDistance);
}; };

View File

@ -0,0 +1,105 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "utils/autocorrection_threshold_utils.h"
#include <cmath>
#include "defines.h"
#include "suggest/policyimpl/utils/edit_distance.h"
#include "suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h"
namespace latinime {
const int AutocorrectionThresholdUtils::MAX_INITIAL_SCORE = 255;
const int AutocorrectionThresholdUtils::TYPED_LETTER_MULTIPLIER = 2;
const int AutocorrectionThresholdUtils::FULL_WORD_MULTIPLIER = 2;
/* static */ int AutocorrectionThresholdUtils::editDistance(const int *before,
const int beforeLength, const int *after, const int afterLength) {
const DamerauLevenshteinEditDistancePolicy daemaruLevenshtein(
before, beforeLength, after, afterLength);
return static_cast<int>(EditDistance::getEditDistance(&daemaruLevenshtein));
}
// In dictionary.cpp, getSuggestion() method,
// When USE_SUGGEST_INTERFACE_FOR_TYPING is true:
//
// // TODO: Revise the following logic thoroughly by referring to the logic
// // marked as "Otherwise" below.
// SUGGEST_INTERFACE_OUTPUT_SCALE was multiplied to the original suggestion scores to convert
// them to integers.
// score = (int)((original score) * SUGGEST_INTERFACE_OUTPUT_SCALE)
// Undo the scaling here to recover the original score.
// normalizedScore = ((float)score) / SUGGEST_INTERFACE_OUTPUT_SCALE
//
// Otherwise: suggestion scores are computed using the below formula.
// original score
// := powf(mTypedLetterMultiplier (this is defined 2),
// (the number of matched characters between typed word and suggested word))
// * (individual word's score which defined in the unigram dictionary,
// and this score is defined in range [0, 255].)
// Then, the following processing is applied.
// - If the dictionary word is matched up to the point of the user entry
// (full match up to min(before.length(), after.length())
// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
// - If the word is a true full match except for differences in accents or
// capitalization, then treat it as if the score was 255.
// - If before.length() == after.length()
// => multiply by mFullWordMultiplier (this is defined 2))
// So, maximum original score is powf(2, min(before.length(), after.length())) * 255 * 2 * 1.2
// For historical reasons we ignore the 1.2 modifier (because the measure for a good
// autocorrection threshold was done at a time when it didn't exist). This doesn't change
// the result.
// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2.
/* static */ float AutocorrectionThresholdUtils::calcNormalizedScore(const int *before,
const int beforeLength, const int *after, const int afterLength, const int score) {
if (0 == beforeLength || 0 == afterLength) {
return 0.0f;
}
const int distance = editDistance(before, beforeLength, after, afterLength);
int spaceCount = 0;
for (int i = 0; i < afterLength; ++i) {
if (after[i] == KEYCODE_SPACE) {
++spaceCount;
}
}
if (spaceCount == afterLength) {
return 0.0f;
}
// add a weight based on edit distance.
// distance <= max(afterLength, beforeLength) == afterLength,
// so, 0 <= distance / afterLength <= 1
const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength);
// TODO: Revise the following logic thoroughly by referring to...
if (true /* USE_SUGGEST_INTERFACE_FOR_TYPING */) {
return (static_cast<float>(score) / SUGGEST_INTERFACE_OUTPUT_SCALE) * weight;
}
// ...this logic.
const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX)
: static_cast<float>(MAX_INITIAL_SCORE)
* powf(static_cast<float>(TYPED_LETTER_MULTIPLIER),
static_cast<float>(min(beforeLength, afterLength - spaceCount)))
* static_cast<float>(FULL_WORD_MULTIPLIER);
return (static_cast<float>(score) / maxScore) * weight;
}
} // namespace latinime

View File

@ -0,0 +1,39 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_AUTOCORRECTION_THRESHOLD_UTILS_H
#define LATINIME_AUTOCORRECTION_THRESHOLD_UTILS_H
#include "defines.h"
namespace latinime {
class AutocorrectionThresholdUtils {
public:
static float calcNormalizedScore(const int *before, const int beforeLength,
const int *after, const int afterLength, const int score);
static int editDistance(const int *before, const int beforeLength, const int *after,
const int afterLength);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(AutocorrectionThresholdUtils);
static const int MAX_INITIAL_SCORE;
static const int TYPED_LETTER_MULTIPLIER;
static const int FULL_WORD_MULTIPLIER;
};
} // namespace latinime
#endif // LATINIME_AUTOCORRECTION_THRESHOLD_UTILS_H