am 86af3e9f
: am b90a86b3
: Merge "Checking key edit distance of candidate words for gesture scoring."
* commit '86af3e9fe46d3bae17ba8ce0adf748d741226c01': Checking key edit distance of candidate words for gesture scoring.
This commit is contained in:
commit
06579eda3f
4 changed files with 197 additions and 42 deletions
|
@ -23,6 +23,8 @@
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "proximity_info_state.h"
|
#include "proximity_info_state.h"
|
||||||
#include "suggest_utils.h"
|
#include "suggest_utils.h"
|
||||||
|
#include "suggest/policyimpl/utils/edit_distance.h"
|
||||||
|
#include "suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -906,50 +908,11 @@ inline static bool isUpperCase(unsigned short c) {
|
||||||
return totalFreq;
|
return totalFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Damerau-Levenshtein distance */
|
|
||||||
inline static int editDistanceInternal(int *editDistanceTable, const int *before,
|
|
||||||
const int beforeLength, const int *after, const int afterLength) {
|
|
||||||
// dp[li][lo] dp[a][b] = dp[ a * lo + b]
|
|
||||||
int *dp = editDistanceTable;
|
|
||||||
const int li = beforeLength + 1;
|
|
||||||
const int lo = afterLength + 1;
|
|
||||||
for (int i = 0; i < li; ++i) {
|
|
||||||
dp[lo * i] = i;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < lo; ++i) {
|
|
||||||
dp[i] = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < li - 1; ++i) {
|
|
||||||
for (int j = 0; j < lo - 1; ++j) {
|
|
||||||
const int ci = toBaseLowerCase(before[i]);
|
|
||||||
const int co = toBaseLowerCase(after[j]);
|
|
||||||
const int cost = (ci == co) ? 0 : 1;
|
|
||||||
dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
|
|
||||||
min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
|
|
||||||
if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
|
|
||||||
&& co == toBaseLowerCase(before[i - 1])) {
|
|
||||||
dp[(i + 1) * lo + (j + 1)] = min(
|
|
||||||
dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (DEBUG_EDIT_DISTANCE) {
|
|
||||||
AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength);
|
|
||||||
for (int i = 0; i < li; ++i) {
|
|
||||||
for (int j = 0; j < lo; ++j) {
|
|
||||||
AKLOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return dp[li * lo - 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* static */ int Correction::RankingAlgorithm::editDistance(const int *before,
|
/* static */ int Correction::RankingAlgorithm::editDistance(const int *before,
|
||||||
const int beforeLength, const int *after, const int afterLength) {
|
const int beforeLength, const int *after, const int afterLength) {
|
||||||
int table[(beforeLength + 1) * (afterLength + 1)];
|
const DamerauLevenshteinEditDistancePolicy daemaruLevenshtein(
|
||||||
return editDistanceInternal(table, before, beforeLength, after, afterLength);
|
before, beforeLength, after, afterLength);
|
||||||
|
return static_cast<int>(EditDistance::getEditDistance(&daemaruLevenshtein));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,79 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
|
||||||
|
#define LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
|
||||||
|
|
||||||
|
#include "char_utils.h"
|
||||||
|
#include "suggest/policyimpl/utils/edit_distance_policy.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class DamerauLevenshteinEditDistancePolicy : public EditDistancePolicy {
|
||||||
|
public:
|
||||||
|
DamerauLevenshteinEditDistancePolicy(const int *const string0, const int length0,
|
||||||
|
const int *const string1, const int length1)
|
||||||
|
: mString0(string0), mString0Length(length0), mString1(string1),
|
||||||
|
mString1Length(length1) {}
|
||||||
|
~DamerauLevenshteinEditDistancePolicy() {}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE float getSubstitutionCost(const int index0, const int index1) const {
|
||||||
|
const int c0 = toBaseLowerCase(mString0[index0]);
|
||||||
|
const int c1 = toBaseLowerCase(mString1[index1]);
|
||||||
|
return (c0 == c1) ? 0.0f : 1.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE float getDeletionCost(const int index0, const int index1) const {
|
||||||
|
return 1.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE float getInsertionCost(const int index0, const int index1) const {
|
||||||
|
return 1.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool allowTransposition(const int index0, const int index1) const {
|
||||||
|
const int c0 = toBaseLowerCase(mString0[index0]);
|
||||||
|
const int c1 = toBaseLowerCase(mString1[index1]);
|
||||||
|
if (index0 > 0 && index1 > 0 && c0 == toBaseLowerCase(mString1[index1 - 1])
|
||||||
|
&& c1 == toBaseLowerCase(mString0[index0 - 1])) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE float getTranspositionCost(const int index0, const int index1) const {
|
||||||
|
return getSubstitutionCost(index0, index1);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int getString0Length() const {
|
||||||
|
return mString0Length;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int getString1Length() const {
|
||||||
|
return mString1Length;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN (DamerauLevenshteinEditDistancePolicy);
|
||||||
|
|
||||||
|
const int *const mString0;
|
||||||
|
const int mString0Length;
|
||||||
|
const int *const mString1;
|
||||||
|
const int mString1Length;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
|
||||||
|
#endif // LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
|
70
native/jni/src/suggest/policyimpl/utils/edit_distance.h
Normal file
70
native/jni/src/suggest/policyimpl/utils/edit_distance.h
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_EDIT_DISTANCE_H
|
||||||
|
#define LATINIME_EDIT_DISTANCE_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/utils/edit_distance_policy.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class EditDistance {
|
||||||
|
public:
|
||||||
|
// CAVEAT: There may be performance penalty if you need the edit distance as an integer value.
|
||||||
|
AK_FORCE_INLINE static float getEditDistance(const EditDistancePolicy *const policy) {
|
||||||
|
const int beforeLength = policy->getString0Length();
|
||||||
|
const int afterLength = policy->getString1Length();
|
||||||
|
float dp[(beforeLength + 1) * (afterLength + 1)];
|
||||||
|
for (int i = 0; i <= beforeLength; ++i) {
|
||||||
|
dp[(afterLength + 1) * i] = i * policy->getInsertionCost(i - 1, -1);
|
||||||
|
}
|
||||||
|
for (int i = 0; i <= afterLength; ++i) {
|
||||||
|
dp[i] = i * policy->getDeletionCost(-1, i - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < beforeLength; ++i) {
|
||||||
|
for (int j = 0; j < afterLength; ++j) {
|
||||||
|
dp[(afterLength + 1) * (i + 1) + (j + 1)] = min(
|
||||||
|
dp[(afterLength + 1) * i + (j + 1)] + policy->getInsertionCost(i, j),
|
||||||
|
min(dp[(afterLength + 1) * (i + 1) + j] + policy->getDeletionCost(i, j),
|
||||||
|
dp[(afterLength + 1) * i + j]
|
||||||
|
+ policy->getSubstitutionCost(i, j)));
|
||||||
|
if (policy->allowTransposition(i, j)) {
|
||||||
|
dp[(afterLength + 1) * (i + 1) + (j + 1)] = min(
|
||||||
|
dp[(afterLength + 1) * (i + 1) + (j + 1)],
|
||||||
|
dp[(afterLength + 1) * (i - 1) + (j - 1)]
|
||||||
|
+ policy->getTranspositionCost(i, j));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (DEBUG_EDIT_DISTANCE) {
|
||||||
|
AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength);
|
||||||
|
for (int i = 0; i < beforeLength + 1; ++i) {
|
||||||
|
for (int j = 0; j < afterLength + 1; ++j) {
|
||||||
|
AKLOGI("EDIT[%d][%d], %f", i, j, dp[(afterLength + 1) * i + j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return dp[(beforeLength + 1) * (afterLength + 1) - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(EditDistance);
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
|
||||||
|
#endif // LATINIME_EDIT_DISTANCE_H
|
|
@ -0,0 +1,43 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_EDIT_DISTANCE_POLICY_H
|
||||||
|
#define LATINIME_EDIT_DISTANCE_POLICY_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class EditDistancePolicy {
|
||||||
|
public:
|
||||||
|
virtual float getSubstitutionCost(const int index0, const int index1) const = 0;
|
||||||
|
virtual float getDeletionCost(const int index0, const int index1) const = 0;
|
||||||
|
virtual float getInsertionCost(const int index0, const int index1) const = 0;
|
||||||
|
virtual bool allowTransposition(const int index0, const int index1) const = 0;
|
||||||
|
virtual float getTranspositionCost(const int index0, const int index1) const = 0;
|
||||||
|
virtual int getString0Length() const = 0;
|
||||||
|
virtual int getString1Length() const = 0;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
EditDistancePolicy() {}
|
||||||
|
virtual ~EditDistancePolicy() {}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(EditDistancePolicy);
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
|
||||||
|
#endif // LATINIME_EDIT_DISTANCE_POLICY_H
|
Loading…
Reference in a new issue