From a09f9709e0550e2241a349c711b31f06d24671ec Mon Sep 17 00:00:00 2001 From: Keisuke Kuroynagi Date: Tue, 7 May 2013 19:47:20 +0900 Subject: [PATCH] Checking key edit distance of candidate words for gesture scoring. bug: 8616704 Change-Id: I98c3248877b1e3e722a6c00a2851e543e5fcbe11 --- native/jni/src/correction.cpp | 47 ++--------- ...damerau_levenshtein_edit_distance_policy.h | 79 +++++++++++++++++++ .../suggest/policyimpl/utils/edit_distance.h | 70 ++++++++++++++++ .../policyimpl/utils/edit_distance_policy.h | 43 ++++++++++ 4 files changed, 197 insertions(+), 42 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h create mode 100644 native/jni/src/suggest/policyimpl/utils/edit_distance.h create mode 100644 native/jni/src/suggest/policyimpl/utils/edit_distance_policy.h diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index 0c65939e0..61bf3f619 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -23,6 +23,8 @@ #include "defines.h" #include "proximity_info_state.h" #include "suggest_utils.h" +#include "suggest/policyimpl/utils/edit_distance.h" +#include "suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h" namespace latinime { @@ -906,50 +908,11 @@ inline static bool isUpperCase(unsigned short c) { return totalFreq; } -/* Damerau-Levenshtein distance */ -inline static int editDistanceInternal(int *editDistanceTable, const int *before, - const int beforeLength, const int *after, const int afterLength) { - // dp[li][lo] dp[a][b] = dp[ a * lo + b] - int *dp = editDistanceTable; - const int li = beforeLength + 1; - const int lo = afterLength + 1; - for (int i = 0; i < li; ++i) { - dp[lo * i] = i; - } - for (int i = 0; i < lo; ++i) { - dp[i] = i; - } - - for (int i = 0; i < li - 1; ++i) { - for (int j = 0; j < lo - 1; ++j) { - const int ci = toBaseLowerCase(before[i]); - const int co = toBaseLowerCase(after[j]); - const int cost = (ci == co) ? 0 : 1; - dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1, - min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost)); - if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1]) - && co == toBaseLowerCase(before[i - 1])) { - dp[(i + 1) * lo + (j + 1)] = min( - dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost); - } - } - } - - if (DEBUG_EDIT_DISTANCE) { - AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength); - for (int i = 0; i < li; ++i) { - for (int j = 0; j < lo; ++j) { - AKLOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]); - } - } - } - return dp[li * lo - 1]; -} - /* static */ int Correction::RankingAlgorithm::editDistance(const int *before, const int beforeLength, const int *after, const int afterLength) { - int table[(beforeLength + 1) * (afterLength + 1)]; - return editDistanceInternal(table, before, beforeLength, after, afterLength); + const DamerauLevenshteinEditDistancePolicy daemaruLevenshtein( + before, beforeLength, after, afterLength); + return static_cast(EditDistance::getEditDistance(&daemaruLevenshtein)); } diff --git a/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h b/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h new file mode 100644 index 000000000..ec1457455 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H +#define LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H + +#include "char_utils.h" +#include "suggest/policyimpl/utils/edit_distance_policy.h" + +namespace latinime { + +class DamerauLevenshteinEditDistancePolicy : public EditDistancePolicy { + public: + DamerauLevenshteinEditDistancePolicy(const int *const string0, const int length0, + const int *const string1, const int length1) + : mString0(string0), mString0Length(length0), mString1(string1), + mString1Length(length1) {} + ~DamerauLevenshteinEditDistancePolicy() {} + + AK_FORCE_INLINE float getSubstitutionCost(const int index0, const int index1) const { + const int c0 = toBaseLowerCase(mString0[index0]); + const int c1 = toBaseLowerCase(mString1[index1]); + return (c0 == c1) ? 0.0f : 1.0f; + } + + AK_FORCE_INLINE float getDeletionCost(const int index0, const int index1) const { + return 1.0f; + } + + AK_FORCE_INLINE float getInsertionCost(const int index0, const int index1) const { + return 1.0f; + } + + AK_FORCE_INLINE bool allowTransposition(const int index0, const int index1) const { + const int c0 = toBaseLowerCase(mString0[index0]); + const int c1 = toBaseLowerCase(mString1[index1]); + if (index0 > 0 && index1 > 0 && c0 == toBaseLowerCase(mString1[index1 - 1]) + && c1 == toBaseLowerCase(mString0[index0 - 1])) { + return true; + } + return false; + } + + AK_FORCE_INLINE float getTranspositionCost(const int index0, const int index1) const { + return getSubstitutionCost(index0, index1); + } + + AK_FORCE_INLINE int getString0Length() const { + return mString0Length; + } + + AK_FORCE_INLINE int getString1Length() const { + return mString1Length; + } + + private: + DISALLOW_COPY_AND_ASSIGN (DamerauLevenshteinEditDistancePolicy); + + const int *const mString0; + const int mString0Length; + const int *const mString1; + const int mString1Length; +}; +} // namespace latinime + +#endif // LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/utils/edit_distance.h b/native/jni/src/suggest/policyimpl/utils/edit_distance.h new file mode 100644 index 000000000..cbbd66894 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/utils/edit_distance.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_EDIT_DISTANCE_H +#define LATINIME_EDIT_DISTANCE_H + +#include "defines.h" +#include "suggest/policyimpl/utils/edit_distance_policy.h" + +namespace latinime { + +class EditDistance { + public: + // CAVEAT: There may be performance penalty if you need the edit distance as an integer value. + AK_FORCE_INLINE static float getEditDistance(const EditDistancePolicy *const policy) { + const int beforeLength = policy->getString0Length(); + const int afterLength = policy->getString1Length(); + float dp[(beforeLength + 1) * (afterLength + 1)]; + for (int i = 0; i <= beforeLength; ++i) { + dp[(afterLength + 1) * i] = i * policy->getInsertionCost(i - 1, -1); + } + for (int i = 0; i <= afterLength; ++i) { + dp[i] = i * policy->getDeletionCost(-1, i - 1); + } + + for (int i = 0; i < beforeLength; ++i) { + for (int j = 0; j < afterLength; ++j) { + dp[(afterLength + 1) * (i + 1) + (j + 1)] = min( + dp[(afterLength + 1) * i + (j + 1)] + policy->getInsertionCost(i, j), + min(dp[(afterLength + 1) * (i + 1) + j] + policy->getDeletionCost(i, j), + dp[(afterLength + 1) * i + j] + + policy->getSubstitutionCost(i, j))); + if (policy->allowTransposition(i, j)) { + dp[(afterLength + 1) * (i + 1) + (j + 1)] = min( + dp[(afterLength + 1) * (i + 1) + (j + 1)], + dp[(afterLength + 1) * (i - 1) + (j - 1)] + + policy->getTranspositionCost(i, j)); + } + } + } + if (DEBUG_EDIT_DISTANCE) { + AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength); + for (int i = 0; i < beforeLength + 1; ++i) { + for (int j = 0; j < afterLength + 1; ++j) { + AKLOGI("EDIT[%d][%d], %f", i, j, dp[(afterLength + 1) * i + j]); + } + } + } + return dp[(beforeLength + 1) * (afterLength + 1) - 1]; + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(EditDistance); +}; +} // namespace latinime + +#endif // LATINIME_EDIT_DISTANCE_H diff --git a/native/jni/src/suggest/policyimpl/utils/edit_distance_policy.h b/native/jni/src/suggest/policyimpl/utils/edit_distance_policy.h new file mode 100644 index 000000000..e3d1792cb --- /dev/null +++ b/native/jni/src/suggest/policyimpl/utils/edit_distance_policy.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_EDIT_DISTANCE_POLICY_H +#define LATINIME_EDIT_DISTANCE_POLICY_H + +#include "defines.h" + +namespace latinime { + +class EditDistancePolicy { + public: + virtual float getSubstitutionCost(const int index0, const int index1) const = 0; + virtual float getDeletionCost(const int index0, const int index1) const = 0; + virtual float getInsertionCost(const int index0, const int index1) const = 0; + virtual bool allowTransposition(const int index0, const int index1) const = 0; + virtual float getTranspositionCost(const int index0, const int index1) const = 0; + virtual int getString0Length() const = 0; + virtual int getString1Length() const = 0; + + protected: + EditDistancePolicy() {} + virtual ~EditDistancePolicy() {} + + private: + DISALLOW_COPY_AND_ASSIGN(EditDistancePolicy); +}; +} // namespace latinime + +#endif // LATINIME_EDIT_DISTANCE_POLICY_H