Merge "Tracking minor errors to categorize exact match."

This commit is contained in:
Keisuke Kuroyanagi 2013-10-21 12:32:03 +00:00 committed by Android (Google) Code Review
commit 4fd413b7e4
14 changed files with 190 additions and 71 deletions

View file

@ -57,6 +57,7 @@ LATIN_IME_CORE_SRC_FILES := \
bloom_filter.cpp \
dictionary.cpp \
digraph_utils.cpp \
error_type_utils.cpp \
multi_bigram_map.cpp) \
$(addprefix suggest/core/layout/, \
additional_proximity_chars.cpp \

View file

@ -392,24 +392,4 @@ typedef enum {
// Create new word with space substitution
CT_NEW_WORD_SPACE_SUBSTITUTION,
} CorrectionType;
// ErrorType is mainly decided by CorrectionType but it is also depending on if
// the correction has really been performed or not.
typedef enum {
// Substitution, omission and transposition
ET_EDIT_CORRECTION,
// Proximity error
ET_PROXIMITY_CORRECTION,
// Completion
ET_COMPLETION,
// New word
// TODO: Remove.
// A new word error should be an edit correction error or a proximity correction error.
ET_NEW_WORD,
// Treat error as an intentional omission when the CorrectionType is omission and the node can
// be intentional omission.
ET_INTENTIONAL_OMISSION,
// Not treated as an error. Tracked for checking exact match
ET_NOT_AN_ERROR
} ErrorType;
#endif // LATINIME_DEFINES_H

View file

@ -576,7 +576,8 @@ class DicNode {
// Caveat: Must not be called outside Weighting
// This restriction is guaranteed by "friend"
AK_FORCE_INLINE void addCost(const float spatialCost, const float languageCost,
const bool doNormalization, const int inputSize, const ErrorType errorType) {
const bool doNormalization, const int inputSize,
const ErrorTypeUtils::ErrorType errorType) {
if (DEBUG_GEO_FULL) {
LOGI_SHOW_ADD_COST_PROP;
}

View file

@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/core/dictionary/digraph_utils.h"
#include "suggest/core/dictionary/error_type_utils.h"
namespace latinime {
@ -31,7 +32,7 @@ class DicNodeStateScoring {
mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX),
mEditCorrectionCount(0), mProximityCorrectionCount(0),
mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
mRawLength(0.0f), mExactMatch(true),
mRawLength(0.0f), mContainingErrorTypes(ErrorTypeUtils::NOT_AN_ERROR),
mNormalizedCompoundDistanceAfterFirstWord(MAX_VALUE_FOR_WEIGHTING) {
}
@ -47,7 +48,7 @@ class DicNodeStateScoring {
mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
mNormalizedCompoundDistanceAfterFirstWord = MAX_VALUE_FOR_WEIGHTING;
mExactMatch = true;
mContainingErrorTypes = ErrorTypeUtils::NOT_AN_ERROR;
}
AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) {
@ -59,34 +60,21 @@ class DicNodeStateScoring {
mRawLength = scoring->mRawLength;
mDoubleLetterLevel = scoring->mDoubleLetterLevel;
mDigraphIndex = scoring->mDigraphIndex;
mExactMatch = scoring->mExactMatch;
mContainingErrorTypes = scoring->mContainingErrorTypes;
mNormalizedCompoundDistanceAfterFirstWord =
scoring->mNormalizedCompoundDistanceAfterFirstWord;
}
void addCost(const float spatialCost, const float languageCost, const bool doNormalization,
const int inputSize, const int totalInputIndex, const ErrorType errorType) {
const int inputSize, const int totalInputIndex,
const ErrorTypeUtils::ErrorType errorType) {
addDistance(spatialCost, languageCost, doNormalization, inputSize, totalInputIndex);
switch (errorType) {
case ET_EDIT_CORRECTION:
mContainingErrorTypes = mContainingErrorTypes | errorType;
if (ErrorTypeUtils::isEditCorrectionError(errorType)) {
++mEditCorrectionCount;
mExactMatch = false;
break;
case ET_PROXIMITY_CORRECTION:
}
if (ErrorTypeUtils::isProximityCorrectionError(errorType)) {
++mProximityCorrectionCount;
mExactMatch = false;
break;
case ET_COMPLETION:
mExactMatch = false;
break;
case ET_NEW_WORD:
mExactMatch = false;
break;
case ET_INTENTIONAL_OMISSION:
mExactMatch = false;
break;
case ET_NOT_AN_ERROR:
break;
}
}
@ -182,7 +170,7 @@ class DicNodeStateScoring {
}
bool isExactMatch() const {
return mExactMatch;
return ErrorTypeUtils::isExactMatch(mContainingErrorTypes);
}
private:
@ -199,7 +187,8 @@ class DicNodeStateScoring {
float mSpatialDistance;
float mLanguageDistance;
float mRawLength;
bool mExactMatch;
// All accumulated error types so far
ErrorTypeUtils::ErrorType mContainingErrorTypes;
float mNormalizedCompoundDistanceAfterFirstWord;
AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance,

View file

@ -0,0 +1,34 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/core/dictionary/error_type_utils.h"
namespace latinime {
const ErrorTypeUtils::ErrorType ErrorTypeUtils::NOT_AN_ERROR = 0x0;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_CASE_ERROR = 0x1;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR = 0x2;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x4;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x8;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x10;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x20;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x40;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH;
} // namespace latinime

View file

@ -0,0 +1,69 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_ERROR_TYPE_UTILS_H
#define LATINIME_ERROR_TYPE_UTILS_H
#include <stdint.h>
#include "defines.h"
namespace latinime {
class ErrorTypeUtils {
public:
// ErrorType is mainly decided by CorrectionType but it is also depending on if
// the correction has really been performed or not.
typedef uint32_t ErrorType;
static const ErrorType NOT_AN_ERROR;
static const ErrorType MATCH_WITH_CASE_ERROR;
static const ErrorType MATCH_WITH_ACCENT_ERROR;
static const ErrorType MATCH_WITH_DIGRAPH;
// Treat error as an intentional omission when the CorrectionType is omission and the node can
// be intentional omission.
static const ErrorType INTENTIONAL_OMISSION;
// Substitution, omission and transposition
static const ErrorType EDIT_CORRECTION;
// Proximity error
static const ErrorType PROXIMITY_CORRECTION;
// Completion
static const ErrorType COMPLETION;
// New word
// TODO: Remove.
// A new word error should be an edit correction error or a proximity correction error.
static const ErrorType NEW_WORD;
// TODO: Differentiate errors.
static bool isExactMatch(const ErrorType containingErrors) {
return (containingErrors & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0;
}
static bool isEditCorrectionError(const ErrorType errorType) {
return (errorType & EDIT_CORRECTION) != 0;
}
static bool isProximityCorrectionError(const ErrorType errorType) {
return (errorType & PROXIMITY_CORRECTION) != 0;
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils);
static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH;
};
} // namespace latinime
#endif // LATINIME_ERROR_TYPE_UTILS_H

View file

@ -71,7 +71,7 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr,
&& sweetSpotCenterYs && sweetSpotRadii),
mProximityCharsArray(new int[GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE
/* proximityCharsLength */]),
mCodeToKeyMap() {
mLowerCodePointToKeyMap() {
/* Let's check the input array length here to make sure */
const jsize proximityCharsLength = env->GetArrayLength(proximityChars);
if (proximityCharsLength != GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE) {
@ -147,7 +147,14 @@ int ProximityInfo::getCodePointOf(const int keyIndex) const {
if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
return NOT_A_CODE_POINT;
}
return mKeyIndexToCodePointG[keyIndex];
return mKeyIndexToLowerCodePointG[keyIndex];
}
int ProximityInfo::getOriginalCodePointOf(const int keyIndex) const {
if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
return NOT_A_CODE_POINT;
}
return mKeyIndexToOriginalCodePoint[keyIndex];
}
void ProximityInfo::initializeG() {
@ -164,8 +171,9 @@ void ProximityInfo::initializeG() {
const float gapY = sweetSpotCenterY - mCenterYsG[i];
mSweetSpotCenterYsG[i] = static_cast<int>(mCenterYsG[i] + gapY * verticalScale);
}
mCodeToKeyMap[lowerCode] = i;
mKeyIndexToCodePointG[i] = lowerCode;
mLowerCodePointToKeyMap[lowerCode] = i;
mKeyIndexToOriginalCodePoint[i] = code;
mKeyIndexToLowerCodePointG[i] = lowerCode;
}
for (int i = 0; i < KEY_COUNT; i++) {
mKeyKeyDistancesG[i][i] = 0;

View file

@ -39,6 +39,7 @@ class ProximityInfo {
float getNormalizedSquaredDistanceFromCenterFloatG(
const int keyId, const int x, const int y, const bool isGeometric) const;
int getCodePointOf(const int keyIndex) const;
int getOriginalCodePointOf(const int keyIndex) const;
bool hasSweetSpotData(const int keyIndex) const {
// When there are no calibration data for a key,
// the radius of the key is assigned to zero.
@ -76,11 +77,11 @@ class ProximityInfo {
ProximityInfoUtils::initializeProximities(inputCodes, inputXCoordinates, inputYCoordinates,
inputSize, mKeyXCoordinates, mKeyYCoordinates, mKeyWidths, mKeyHeights,
mProximityCharsArray, CELL_HEIGHT, CELL_WIDTH, GRID_WIDTH, MOST_COMMON_KEY_WIDTH,
KEY_COUNT, mLocaleStr, &mCodeToKeyMap, allInputCodes);
KEY_COUNT, mLocaleStr, &mLowerCodePointToKeyMap, allInputCodes);
}
AK_FORCE_INLINE int getKeyIndexOf(const int c) const {
return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mCodeToKeyMap);
return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mLowerCodePointToKeyMap);
}
AK_FORCE_INLINE bool isCodePointOnKeyboard(const int codePoint) const {
@ -117,9 +118,9 @@ class ProximityInfo {
// Sweet spots for geometric input. Note that we have extra sweet spots only for Y coordinates.
float mSweetSpotCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD];
hash_map_compat<int, int> mCodeToKeyMap;
int mKeyIndexToCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
hash_map_compat<int, int> mLowerCodePointToKeyMap;
int mKeyIndexToOriginalCodePoint[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mKeyIndexToLowerCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCenterXsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mKeyKeyDistancesG[MAX_KEY_COUNT_IN_A_KEYBOARD][MAX_KEY_COUNT_IN_A_KEYBOARD];

View file

@ -30,6 +30,12 @@
namespace latinime {
int ProximityInfoState::getPrimaryOriginalCodePointAt(const int index) const {
const int primaryCodePoint = getPrimaryCodePointAt(index);
const int keyIndex = mProximityInfo->getKeyIndexOf(primaryCodePoint);
return mProximityInfo->getOriginalCodePointOf(keyIndex);
}
// TODO: Remove the dependency of "isGeometric"
void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength,
const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize,

View file

@ -65,6 +65,8 @@ class ProximityInfoState {
return getProximityCodePointsAt(index)[0];
}
int getPrimaryOriginalCodePointAt(const int index) const;
inline bool sameAsTyped(const int *word, int length) const {
if (length != mSampledInputSize) {
return false;

View file

@ -20,6 +20,7 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_profiler.h"
#include "suggest/core/dicnode/dic_node_utils.h"
#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/session/dic_traverse_session.h"
namespace latinime {
@ -82,8 +83,8 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
traverseSession, parentDicNode, dicNode, &inputStateG);
const float languageCost = Weighting::getLanguageCost(weighting, correctionType,
traverseSession, parentDicNode, dicNode, multiBigramMap);
const ErrorType errorType = weighting->getErrorType(correctionType, traverseSession,
parentDicNode, dicNode);
const ErrorTypeUtils::ErrorType errorType = weighting->getErrorType(correctionType,
traverseSession, parentDicNode, dicNode);
profile(correctionType, dicNode);
if (inputStateG.mNeedsToUpdateInputStateG) {
dicNode->updateInputIndexG(&inputStateG);

View file

@ -18,6 +18,7 @@
#define LATINIME_WEIGHTING_H
#include "defines.h"
#include "suggest/core/dictionary/error_type_utils.h"
namespace latinime {
@ -84,7 +85,7 @@ class Weighting {
virtual float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode) const = 0;
virtual ErrorType getErrorType(const CorrectionType correctionType,
virtual ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;

View file

@ -23,39 +23,64 @@ namespace latinime {
const TypingWeighting TypingWeighting::sInstance;
ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType,
ErrorTypeUtils::ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode,
const DicNode *const dicNode) const {
switch (correctionType) {
case CT_MATCH:
if (isProximityDicNode(traverseSession, dicNode)) {
return ET_PROXIMITY_CORRECTION;
return ErrorTypeUtils::PROXIMITY_CORRECTION;
} else if (dicNode->isInDigraph()) {
return ErrorTypeUtils::MATCH_WITH_DIGRAPH;
} else {
return ET_NOT_AN_ERROR;
// Compare the node code point with original primary code point on the keyboard.
const ProximityInfoState *const pInfoState =
traverseSession->getProximityInfoState(0);
const int primaryOriginalCodePoint = pInfoState->getPrimaryOriginalCodePointAt(
dicNode->getInputIndex(0));
const int nodeCodePoint = dicNode->getNodeCodePoint();
if (primaryOriginalCodePoint == nodeCodePoint) {
// Node code point is same as original code point on the keyboard.
return ErrorTypeUtils::NOT_AN_ERROR;
} else if (CharUtils::toLowerCase(primaryOriginalCodePoint) ==
CharUtils::toLowerCase(nodeCodePoint)) {
// Only cases of the code points are different.
return ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
} else if (CharUtils::toBaseCodePoint(primaryOriginalCodePoint) ==
CharUtils::toBaseCodePoint(nodeCodePoint)) {
// Node code point is a variant of original code point.
return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR;
} else {
// Node code point is a variant of original code point and the cases are also
// different.
return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR
| ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
}
}
break;
case CT_ADDITIONAL_PROXIMITY:
return ET_PROXIMITY_CORRECTION;
return ErrorTypeUtils::PROXIMITY_CORRECTION;
case CT_OMISSION:
if (parentDicNode->canBeIntentionalOmission()) {
return ET_INTENTIONAL_OMISSION;
return ErrorTypeUtils::INTENTIONAL_OMISSION;
} else {
return ET_EDIT_CORRECTION;
return ErrorTypeUtils::EDIT_CORRECTION;
}
break;
case CT_SUBSTITUTION:
case CT_INSERTION:
case CT_TERMINAL_INSERTION:
case CT_TRANSPOSITION:
return ET_EDIT_CORRECTION;
return ErrorTypeUtils::EDIT_CORRECTION;
case CT_NEW_WORD_SPACE_OMISSION:
case CT_NEW_WORD_SPACE_SUBSTITUTION:
return ET_NEW_WORD;
return ErrorTypeUtils::NEW_WORD;
case CT_TERMINAL:
return ET_NOT_AN_ERROR;
return ErrorTypeUtils::NOT_AN_ERROR;
case CT_COMPLETION:
return ET_COMPLETION;
return ErrorTypeUtils::COMPLETION;
default:
return ET_NOT_AN_ERROR;
return ErrorTypeUtils::NOT_AN_ERROR;
}
}
} // namespace latinime

View file

@ -19,6 +19,7 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node_utils.h"
#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/layout/touch_position_correction_utils.h"
#include "suggest/core/policy/weighting.h"
#include "suggest/core/session/dic_traverse_session.h"
@ -204,7 +205,7 @@ class TypingWeighting : public Weighting {
return cost * traverseSession->getMultiWordCostMultiplier();
}
ErrorType getErrorType(const CorrectionType correctionType,
ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const;