Move char_utils to the dictionary directory
Change-Id: Id397485407fe63600c9bb5c80d223042942b4d8amain
parent
70c2dcffa3
commit
464d3ba432
|
@ -47,7 +47,6 @@ LATIN_IME_JNI_SRC_FILES := \
|
||||||
|
|
||||||
LATIN_IME_CORE_SRC_FILES := \
|
LATIN_IME_CORE_SRC_FILES := \
|
||||||
bigram_dictionary.cpp \
|
bigram_dictionary.cpp \
|
||||||
char_utils.cpp \
|
|
||||||
correction.cpp \
|
correction.cpp \
|
||||||
dic_traverse_wrapper.cpp \
|
dic_traverse_wrapper.cpp \
|
||||||
unigram_dictionary.cpp \
|
unigram_dictionary.cpp \
|
||||||
|
@ -58,6 +57,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
dic_node_utils.cpp \
|
dic_node_utils.cpp \
|
||||||
dic_nodes_cache.cpp) \
|
dic_nodes_cache.cpp) \
|
||||||
$(addprefix suggest/core/dictionary/, \
|
$(addprefix suggest/core/dictionary/, \
|
||||||
|
char_utils.cpp \
|
||||||
dictionary.cpp \
|
dictionary.cpp \
|
||||||
digraph_utils.cpp) \
|
digraph_utils.cpp) \
|
||||||
$(addprefix suggest/core/layout/, \
|
$(addprefix suggest/core/layout/, \
|
||||||
|
|
|
@ -20,10 +20,10 @@
|
||||||
|
|
||||||
#include "bigram_dictionary.h"
|
#include "bigram_dictionary.h"
|
||||||
|
|
||||||
#include "char_utils.h"
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/binary_format.h"
|
#include "suggest/core/dictionary/binary_format.h"
|
||||||
#include "suggest/core/dictionary/bloom_filter.h"
|
#include "suggest/core/dictionary/bloom_filter.h"
|
||||||
|
#include "suggest/core/dictionary/char_utils.h"
|
||||||
#include "suggest/core/dictionary/dictionary.h"
|
#include "suggest/core/dictionary/dictionary.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -52,7 +52,7 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int
|
||||||
int insertAt = 0;
|
int insertAt = 0;
|
||||||
while (insertAt < MAX_RESULTS) {
|
while (insertAt < MAX_RESULTS) {
|
||||||
if (probability > bigramProbability[insertAt] || (bigramProbability[insertAt] == probability
|
if (probability > bigramProbability[insertAt] || (bigramProbability[insertAt] == probability
|
||||||
&& length < getCodePointCount(MAX_WORD_LENGTH,
|
&& length < CharUtils::getCodePointCount(MAX_WORD_LENGTH,
|
||||||
bigramCodePoints + insertAt * MAX_WORD_LENGTH))) {
|
bigramCodePoints + insertAt * MAX_WORD_LENGTH))) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -196,9 +196,9 @@ bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) cons
|
||||||
// what user typed.
|
// what user typed.
|
||||||
|
|
||||||
int maxAlt = MAX_ALTERNATIVES;
|
int maxAlt = MAX_ALTERNATIVES;
|
||||||
const int firstBaseLowerCodePoint = toBaseLowerCase(*word);
|
const int firstBaseLowerCodePoint = CharUtils::toBaseLowerCase(*word);
|
||||||
while (maxAlt > 0) {
|
while (maxAlt > 0) {
|
||||||
if (toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) {
|
if (CharUtils::toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
inputCodePoints++;
|
inputCodePoints++;
|
||||||
|
|
|
@ -1,88 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2010 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef LATINIME_CHAR_UTILS_H
|
|
||||||
#define LATINIME_CHAR_UTILS_H
|
|
||||||
|
|
||||||
#include <cctype>
|
|
||||||
|
|
||||||
#include "defines.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
inline static bool isAsciiUpper(int c) {
|
|
||||||
// Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
|
|
||||||
// be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
|
|
||||||
return (c >= 'A' && c <= 'Z');
|
|
||||||
}
|
|
||||||
|
|
||||||
inline static int toAsciiLower(int c) {
|
|
||||||
return c - 'A' + 'a';
|
|
||||||
}
|
|
||||||
|
|
||||||
inline static bool isAscii(int c) {
|
|
||||||
return isascii(c) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned short latin_tolower(const unsigned short c);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Table mapping most combined Latin, Greek, and Cyrillic characters
|
|
||||||
* to their base characters. If c is in range, BASE_CHARS[c] == c
|
|
||||||
* if c is not a combined character, or the base character if it
|
|
||||||
* is combined.
|
|
||||||
*/
|
|
||||||
static const int BASE_CHARS_SIZE = 0x0500;
|
|
||||||
extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
|
|
||||||
|
|
||||||
inline static int toBaseCodePoint(int c) {
|
|
||||||
if (c < BASE_CHARS_SIZE) {
|
|
||||||
return static_cast<int>(BASE_CHARS[c]);
|
|
||||||
}
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE static int toLowerCase(const int c) {
|
|
||||||
if (isAsciiUpper(c)) {
|
|
||||||
return toAsciiLower(c);
|
|
||||||
}
|
|
||||||
if (isAscii(c)) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE static int toBaseLowerCase(const int c) {
|
|
||||||
return toLowerCase(toBaseCodePoint(c));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline static bool isIntentionalOmissionCodePoint(const int codePoint) {
|
|
||||||
// TODO: Do not hardcode here
|
|
||||||
return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline static int getCodePointCount(const int arraySize, const int *const codePoints) {
|
|
||||||
int size = 0;
|
|
||||||
for (; size < arraySize; ++size) {
|
|
||||||
if (codePoints[size] == '\0') {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace latinime
|
|
||||||
#endif // LATINIME_CHAR_UTILS_H
|
|
|
@ -18,9 +18,9 @@
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
#include "char_utils.h"
|
|
||||||
#include "correction.h"
|
#include "correction.h"
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/core/dictionary/char_utils.h"
|
||||||
#include "suggest/core/layout/proximity_info_state.h"
|
#include "suggest/core/layout/proximity_info_state.h"
|
||||||
#include "suggest/core/layout/touch_position_correction_utils.h"
|
#include "suggest/core/layout/touch_position_correction_utils.h"
|
||||||
#include "suggest/policyimpl/utils/edit_distance.h"
|
#include "suggest/policyimpl/utils/edit_distance.h"
|
||||||
|
@ -528,7 +528,7 @@ inline static int getQuoteCount(const int *word, const int length) {
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static bool isUpperCase(unsigned short c) {
|
inline static bool isUpperCase(unsigned short c) {
|
||||||
return isAsciiUpper(toBaseCodePoint(c));
|
return CharUtils::isAsciiUpper(CharUtils::toBaseCodePoint(c));
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////
|
//////////////////////
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
|
|
||||||
#include "correction_state.h"
|
#include "correction_state.h"
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/core/dictionary/char_utils.h"
|
||||||
#include "suggest/core/layout/proximity_info_state.h"
|
#include "suggest/core/layout/proximity_info_state.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -342,13 +343,13 @@ AK_FORCE_INLINE static void calcEditDistanceOneStep(int *editDistanceTable, cons
|
||||||
const int *const prevprev =
|
const int *const prevprev =
|
||||||
outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
|
outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
|
||||||
current[0] = outputLength;
|
current[0] = outputLength;
|
||||||
const int co = toBaseLowerCase(output[outputLength - 1]);
|
const int co = CharUtils::toBaseLowerCase(output[outputLength - 1]);
|
||||||
const int prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
|
const int prevCO = outputLength >= 2 ? CharUtils::toBaseLowerCase(output[outputLength - 2]) : 0;
|
||||||
for (int i = 1; i <= inputSize; ++i) {
|
for (int i = 1; i <= inputSize; ++i) {
|
||||||
const int ci = toBaseLowerCase(input[i - 1]);
|
const int ci = CharUtils::toBaseLowerCase(input[i - 1]);
|
||||||
const int cost = (ci == co) ? 0 : 1;
|
const int cost = (ci == co) ? 0 : 1;
|
||||||
current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
|
current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
|
||||||
if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
|
if (i >= 2 && prevprev && ci == prevCO && co == CharUtils::toBaseLowerCase(input[i - 2])) {
|
||||||
current[i] = min(current[i], prevprev[i - 2] + 1);
|
current[i] = min(current[i], prevprev[i - 2] + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,12 +17,12 @@
|
||||||
#ifndef LATINIME_DIC_NODE_H
|
#ifndef LATINIME_DIC_NODE_H
|
||||||
#define LATINIME_DIC_NODE_H
|
#define LATINIME_DIC_NODE_H
|
||||||
|
|
||||||
#include "char_utils.h"
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dicnode/dic_node_state.h"
|
#include "suggest/core/dicnode/dic_node_state.h"
|
||||||
#include "suggest/core/dicnode/dic_node_profiler.h"
|
#include "suggest/core/dicnode/dic_node_profiler.h"
|
||||||
#include "suggest/core/dicnode/dic_node_properties.h"
|
#include "suggest/core/dicnode/dic_node_properties.h"
|
||||||
#include "suggest/core/dicnode/dic_node_release_listener.h"
|
#include "suggest/core/dicnode/dic_node_release_listener.h"
|
||||||
|
#include "suggest/core/dictionary/char_utils.h"
|
||||||
#include "suggest/core/dictionary/digraph_utils.h"
|
#include "suggest/core/dictionary/digraph_utils.h"
|
||||||
|
|
||||||
#if DEBUG_DICT
|
#if DEBUG_DICT
|
||||||
|
@ -221,7 +221,7 @@ class DicNode {
|
||||||
|
|
||||||
bool isFirstCharUppercase() const {
|
bool isFirstCharUppercase() const {
|
||||||
const int c = getOutputWordBuf()[0];
|
const int c = getOutputWordBuf()[0];
|
||||||
return isAsciiUpper(c);
|
return CharUtils::isAsciiUpper(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isFirstWord() const {
|
bool isFirstWord() const {
|
||||||
|
@ -375,7 +375,7 @@ class DicNode {
|
||||||
// Whether the current codepoint can be an intentional omission, in which case the traversal
|
// Whether the current codepoint can be an intentional omission, in which case the traversal
|
||||||
// algorithm will always check for a possible omission here.
|
// algorithm will always check for a possible omission here.
|
||||||
bool canBeIntentionalOmission() const {
|
bool canBeIntentionalOmission() const {
|
||||||
return isIntentionalOmissionCodePoint(getNodeCodePoint());
|
return CharUtils::isIntentionalOmissionCodePoint(getNodeCodePoint());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Whether the omission is so frequent that it should incur zero cost.
|
// Whether the omission is so frequent that it should incur zero cost.
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "suggest/core/dicnode/dic_node_utils.h"
|
#include "suggest/core/dicnode/dic_node_utils.h"
|
||||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
#include "suggest/core/dictionary/binary_format.h"
|
#include "suggest/core/dictionary/binary_format.h"
|
||||||
|
#include "suggest/core/dictionary/char_utils.h"
|
||||||
#include "suggest/core/dictionary/multi_bigram_map.h"
|
#include "suggest/core/dictionary/multi_bigram_map.h"
|
||||||
#include "suggest/core/layout/proximity_info.h"
|
#include "suggest/core/layout/proximity_info.h"
|
||||||
#include "suggest/core/layout/proximity_info_state.h"
|
#include "suggest/core/layout/proximity_info_state.h"
|
||||||
|
@ -62,9 +63,9 @@ namespace latinime {
|
||||||
DicNodeVector *childDicNodes) {
|
DicNodeVector *childDicNodes) {
|
||||||
// Passing multiple chars node. No need to traverse child
|
// Passing multiple chars node. No need to traverse child
|
||||||
const int codePoint = dicNode->getNodeTypedCodePoint();
|
const int codePoint = dicNode->getNodeTypedCodePoint();
|
||||||
const int baseLowerCaseCodePoint = toBaseLowerCase(codePoint);
|
const int baseLowerCaseCodePoint = CharUtils::toBaseLowerCase(codePoint);
|
||||||
const bool isMatch = isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, codePoint);
|
const bool isMatch = isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, codePoint);
|
||||||
if (isMatch || isIntentionalOmissionCodePoint(baseLowerCaseCodePoint)) {
|
if (isMatch || CharUtils::isIntentionalOmissionCodePoint(baseLowerCaseCodePoint)) {
|
||||||
childDicNodes->pushPassingChild(dicNode);
|
childDicNodes->pushPassingChild(dicNode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -125,13 +126,13 @@ namespace latinime {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (pInfo && (pInfo->getKeyIndexOf(nodeCodePoint) == NOT_AN_INDEX
|
if (pInfo && (pInfo->getKeyIndexOf(nodeCodePoint) == NOT_AN_INDEX
|
||||||
|| isIntentionalOmissionCodePoint(nodeCodePoint))) {
|
|| CharUtils::isIntentionalOmissionCodePoint(nodeCodePoint))) {
|
||||||
// If normalized nodeCodePoint is not on the keyboard or skippable, this child is never
|
// If normalized nodeCodePoint is not on the keyboard or skippable, this child is never
|
||||||
// filtered.
|
// filtered.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int lowerCodePoint = toLowerCase(nodeCodePoint);
|
const int lowerCodePoint = CharUtils::toLowerCase(nodeCodePoint);
|
||||||
const int baseLowerCodePoint = toBaseCodePoint(lowerCodePoint);
|
const int baseLowerCodePoint = CharUtils::toBaseCodePoint(lowerCodePoint);
|
||||||
// TODO: Avoid linear search
|
// TODO: Avoid linear search
|
||||||
for (int i = 0; i < filterSize; ++i) {
|
for (int i = 0; i < filterSize; ++i) {
|
||||||
// Checking if a normalized code point is in filter characters when pInfo is not
|
// Checking if a normalized code point is in filter characters when pInfo is not
|
||||||
|
|
|
@ -21,9 +21,9 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "char_utils.h"
|
|
||||||
#include "hash_map_compat.h"
|
#include "hash_map_compat.h"
|
||||||
#include "suggest/core/dictionary/bloom_filter.h"
|
#include "suggest/core/dictionary/bloom_filter.h"
|
||||||
|
#include "suggest/core/dictionary/char_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -473,7 +473,8 @@ AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
||||||
// there was no match (or we would have found it).
|
// there was no match (or we would have found it).
|
||||||
if (wordPos >= length) return NOT_VALID_WORD;
|
if (wordPos >= length) return NOT_VALID_WORD;
|
||||||
int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
|
int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
|
||||||
const int wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos];
|
const int wChar = forceLowerCaseSearch
|
||||||
|
? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
|
||||||
while (true) {
|
while (true) {
|
||||||
// If there are no more character groups in this node, it means we could not
|
// If there are no more character groups in this node, it means we could not
|
||||||
// find a matching character for this depth, therefore there is no match.
|
// find a matching character for this depth, therefore there is no match.
|
||||||
|
|
|
@ -14,9 +14,10 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "suggest/core/dictionary/char_utils.h"
|
||||||
|
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
|
||||||
#include "char_utils.h"
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -36,8 +37,7 @@ struct LatinCapitalSmallPair {
|
||||||
* $ apt-get install libicu-dev
|
* $ apt-get install libicu-dev
|
||||||
*
|
*
|
||||||
* 3. Build the following code
|
* 3. Build the following code
|
||||||
* (You need this file, char_utils.h, and defines.h)
|
* $ g++ -o char_utils -I../../.. -DUPDATING_CHAR_UTILS char_utils.cpp -licuuc
|
||||||
* $ g++ -o char_utils -DUPDATING_CHAR_UTILS char_utils.cpp -licuuc
|
|
||||||
*/
|
*/
|
||||||
#ifdef UPDATING_CHAR_UTILS
|
#ifdef UPDATING_CHAR_UTILS
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
@ -47,7 +47,7 @@ extern "C" int main() {
|
||||||
for (unsigned short c = 0; c < 0xFFFF; c++) {
|
for (unsigned short c = 0; c < 0xFFFF; c++) {
|
||||||
if (c <= 0x7F) continue;
|
if (c <= 0x7F) continue;
|
||||||
const unsigned short icu4cLowerC = u_tolower(c);
|
const unsigned short icu4cLowerC = u_tolower(c);
|
||||||
const unsigned short myLowerC = latin_tolower(c);
|
const unsigned short myLowerC = CharUtils::latin_tolower(c);
|
||||||
if (c != icu4cLowerC) {
|
if (c != icu4cLowerC) {
|
||||||
#ifdef CONFIRMING_CHAR_UTILS
|
#ifdef CONFIRMING_CHAR_UTILS
|
||||||
if (icu4cLowerC != myLowerC) {
|
if (icu4cLowerC != myLowerC) {
|
||||||
|
@ -70,7 +70,8 @@ extern "C" int main() {
|
||||||
*
|
*
|
||||||
* 5. Update the SORTED_CHAR_MAP[] array below with the output above.
|
* 5. Update the SORTED_CHAR_MAP[] array below with the output above.
|
||||||
* Then, rebuild with -DCONFIRMING_CHAR_UTILS and confirm the program exits successfully.
|
* Then, rebuild with -DCONFIRMING_CHAR_UTILS and confirm the program exits successfully.
|
||||||
* $ g++ -o char_utils -DUPDATING_CHAR_UTILS -DCONFIRMING_CHAR_UTILS char_utils.cpp -licuuc
|
* $ g++ -o char_utils -I../../.. -DUPDATING_CHAR_UTILS -DCONFIRMING_CHAR_UTILS char_utils.cpp \
|
||||||
|
* -licuuc
|
||||||
* $ ./char_utils
|
* $ ./char_utils
|
||||||
* $
|
* $
|
||||||
*/
|
*/
|
||||||
|
@ -1054,7 +1055,7 @@ static int compare_pair_capital(const void *a, const void *b) {
|
||||||
- static_cast<int>((static_cast<const struct LatinCapitalSmallPair *>(b))->capital);
|
- static_cast<int>((static_cast<const struct LatinCapitalSmallPair *>(b))->capital);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned short latin_tolower(const unsigned short c) {
|
/* static */ unsigned short CharUtils::latin_tolower(const unsigned short c) {
|
||||||
struct LatinCapitalSmallPair *p =
|
struct LatinCapitalSmallPair *p =
|
||||||
static_cast<struct LatinCapitalSmallPair *>(bsearch(&c, SORTED_CHAR_MAP,
|
static_cast<struct LatinCapitalSmallPair *>(bsearch(&c, SORTED_CHAR_MAP,
|
||||||
NELEMS(SORTED_CHAR_MAP), sizeof(SORTED_CHAR_MAP[0]), compare_pair_capital));
|
NELEMS(SORTED_CHAR_MAP), sizeof(SORTED_CHAR_MAP[0]), compare_pair_capital));
|
||||||
|
@ -1063,7 +1064,7 @@ unsigned short latin_tolower(const unsigned short c) {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Table mapping most combined Latin, Greek, and Cyrillic characters
|
* Table mapping most combined Latin, Greek, and Cyrillic characters
|
||||||
* to their base characters. If c is in range, BASE_CHARS[c] == c
|
* to their base characters. If c is in range, CharUtils::BASE_CHARS[c] == c
|
||||||
* if c is not a combined character, or the base character if it
|
* if c is not a combined character, or the base character if it
|
||||||
* is combined.
|
* is combined.
|
||||||
*
|
*
|
||||||
|
@ -1074,7 +1075,7 @@ unsigned short latin_tolower(const unsigned short c) {
|
||||||
* for ($j = $i; $j < $i + 8; $j++) { \
|
* for ($j = $i; $j < $i + 8; $j++) { \
|
||||||
* printf("0x%04X, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
|
* printf("0x%04X, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
|
||||||
*/
|
*/
|
||||||
const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = {
|
/* static */ const unsigned short CharUtils::BASE_CHARS[CharUtils::BASE_CHARS_SIZE] = {
|
||||||
/* U+0000 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
/* U+0000 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
||||||
/* U+0008 */ 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
|
/* U+0008 */ 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
|
||||||
/* U+0010 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
/* U+0010 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
|
@ -0,0 +1,93 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2010 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_CHAR_UTILS_H
|
||||||
|
#define LATINIME_CHAR_UTILS_H
|
||||||
|
|
||||||
|
#include <cctype>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class CharUtils {
|
||||||
|
public:
|
||||||
|
static AK_FORCE_INLINE bool isAsciiUpper(int c) {
|
||||||
|
// Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
|
||||||
|
// be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
|
||||||
|
return (c >= 'A' && c <= 'Z');
|
||||||
|
}
|
||||||
|
|
||||||
|
static AK_FORCE_INLINE int toAsciiLower(int c) {
|
||||||
|
return c - 'A' + 'a';
|
||||||
|
}
|
||||||
|
|
||||||
|
static AK_FORCE_INLINE bool isAscii(int c) {
|
||||||
|
return isascii(c) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static AK_FORCE_INLINE int toLowerCase(const int c) {
|
||||||
|
if (isAsciiUpper(c)) {
|
||||||
|
return toAsciiLower(c);
|
||||||
|
}
|
||||||
|
if (isAscii(c)) {
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static AK_FORCE_INLINE int toBaseLowerCase(const int c) {
|
||||||
|
return toLowerCase(toBaseCodePoint(c));
|
||||||
|
}
|
||||||
|
|
||||||
|
static AK_FORCE_INLINE bool isIntentionalOmissionCodePoint(const int codePoint) {
|
||||||
|
// TODO: Do not hardcode here
|
||||||
|
return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static AK_FORCE_INLINE int getCodePointCount(const int arraySize, const int *const codePoints) {
|
||||||
|
int size = 0;
|
||||||
|
for (; size < arraySize; ++size) {
|
||||||
|
if (codePoints[size] == '\0') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static AK_FORCE_INLINE int toBaseCodePoint(int c) {
|
||||||
|
if (c < BASE_CHARS_SIZE) {
|
||||||
|
return static_cast<int>(BASE_CHARS[c]);
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned short latin_tolower(const unsigned short c);
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Table mapping most combined Latin, Greek, and Cyrillic characters
|
||||||
|
* to their base characters. If c is in range, BASE_CHARS[c] == c
|
||||||
|
* if c is not a combined character, or the base character if it
|
||||||
|
* is combined.
|
||||||
|
*/
|
||||||
|
static const int BASE_CHARS_SIZE = 0x0500;
|
||||||
|
static const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_CHAR_UTILS_H
|
|
@ -16,9 +16,9 @@
|
||||||
|
|
||||||
#include "suggest/core/dictionary/digraph_utils.h"
|
#include "suggest/core/dictionary/digraph_utils.h"
|
||||||
|
|
||||||
#include "char_utils.h"
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/binary_format.h"
|
#include "suggest/core/dictionary/binary_format.h"
|
||||||
|
#include "suggest/core/dictionary/char_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -122,7 +122,7 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
|
||||||
/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForDigraphTypeAndCodePoint(
|
/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForDigraphTypeAndCodePoint(
|
||||||
const DigraphUtils::DigraphType digraphType, const int compositeGlyphCodePoint) {
|
const DigraphUtils::DigraphType digraphType, const int compositeGlyphCodePoint) {
|
||||||
const DigraphUtils::digraph_t *digraphs = 0;
|
const DigraphUtils::digraph_t *digraphs = 0;
|
||||||
const int compositeGlyphLowerCodePoint = toLowerCase(compositeGlyphCodePoint);
|
const int compositeGlyphLowerCodePoint = CharUtils::toLowerCase(compositeGlyphCodePoint);
|
||||||
const int digraphsSize =
|
const int digraphsSize =
|
||||||
DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(digraphType, &digraphs);
|
DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(digraphType, &digraphs);
|
||||||
for (int i = 0; i < digraphsSize; i++) {
|
for (int i = 0; i < digraphsSize; i++) {
|
||||||
|
|
|
@ -21,9 +21,9 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
#include "char_utils.h"
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "jni.h"
|
#include "jni.h"
|
||||||
|
#include "suggest/core/dictionary/char_utils.h"
|
||||||
#include "suggest/core/layout/additional_proximity_chars.h"
|
#include "suggest/core/layout/additional_proximity_chars.h"
|
||||||
#include "suggest/core/layout/geometry_utils.h"
|
#include "suggest/core/layout/geometry_utils.h"
|
||||||
#include "suggest/core/layout/proximity_info_params.h"
|
#include "suggest/core/layout/proximity_info_params.h"
|
||||||
|
@ -165,7 +165,7 @@ void ProximityInfo::initializeG() {
|
||||||
// TODO: Optimize
|
// TODO: Optimize
|
||||||
for (int i = 0; i < KEY_COUNT; ++i) {
|
for (int i = 0; i < KEY_COUNT; ++i) {
|
||||||
const int code = mKeyCodePoints[i];
|
const int code = mKeyCodePoints[i];
|
||||||
const int lowerCode = toLowerCase(code);
|
const int lowerCode = CharUtils::toLowerCase(code);
|
||||||
mCenterXsG[i] = mKeyXCoordinates[i] + mKeyWidths[i] / 2;
|
mCenterXsG[i] = mKeyXCoordinates[i] + mKeyWidths[i] / 2;
|
||||||
mCenterYsG[i] = mKeyYCoordinates[i] + mKeyHeights[i] / 2;
|
mCenterYsG[i] = mKeyYCoordinates[i] + mKeyHeights[i] / 2;
|
||||||
mCodeToKeyMap[lowerCode] = i;
|
mCodeToKeyMap[lowerCode] = i;
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/core/dictionary/char_utils.h"
|
||||||
#include "suggest/core/layout/geometry_utils.h"
|
#include "suggest/core/layout/geometry_utils.h"
|
||||||
#include "suggest/core/layout/proximity_info.h"
|
#include "suggest/core/layout/proximity_info.h"
|
||||||
#include "suggest/core/layout/proximity_info_state_utils.h"
|
#include "suggest/core/layout/proximity_info_state_utils.h"
|
||||||
|
@ -175,7 +176,7 @@ float ProximityInfoState::getPointToKeyLength(
|
||||||
const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
|
const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
|
||||||
return min(mSampledNormalizedSquaredLengthCache[index], mMaxPointToKeyLength);
|
return min(mSampledNormalizedSquaredLengthCache[index], mMaxPointToKeyLength);
|
||||||
}
|
}
|
||||||
if (isIntentionalOmissionCodePoint(codePoint)) {
|
if (CharUtils::isIntentionalOmissionCodePoint(codePoint)) {
|
||||||
return 0.0f;
|
return 0.0f;
|
||||||
}
|
}
|
||||||
// If the char is not a key on the keyboard then return the max length.
|
// If the char is not a key on the keyboard then return the max length.
|
||||||
|
@ -203,7 +204,7 @@ ProximityType ProximityInfoState::getProximityType(const int index, const int co
|
||||||
const bool checkProximityChars, int *proximityIndex) const {
|
const bool checkProximityChars, int *proximityIndex) const {
|
||||||
const int *currentCodePoints = getProximityCodePointsAt(index);
|
const int *currentCodePoints = getProximityCodePointsAt(index);
|
||||||
const int firstCodePoint = currentCodePoints[0];
|
const int firstCodePoint = currentCodePoints[0];
|
||||||
const int baseLowerC = toBaseLowerCase(codePoint);
|
const int baseLowerC = CharUtils::toBaseLowerCase(codePoint);
|
||||||
|
|
||||||
// The first char in the array is what user typed. If it matches right away, that means the
|
// The first char in the array is what user typed. If it matches right away, that means the
|
||||||
// user typed that same char for this pos.
|
// user typed that same char for this pos.
|
||||||
|
@ -215,7 +216,7 @@ ProximityType ProximityInfoState::getProximityType(const int index, const int co
|
||||||
|
|
||||||
// If the non-accented, lowercased version of that first character matches c, then we have a
|
// If the non-accented, lowercased version of that first character matches c, then we have a
|
||||||
// non-accented version of the accented character the user typed. Treat it as a close char.
|
// non-accented version of the accented character the user typed. Treat it as a close char.
|
||||||
if (toBaseLowerCase(firstCodePoint) == baseLowerC) {
|
if (CharUtils::toBaseLowerCase(firstCodePoint) == baseLowerC) {
|
||||||
return PROXIMITY_CHAR;
|
return PROXIMITY_CHAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -257,8 +258,8 @@ ProximityType ProximityInfoState::getProximityTypeG(const int index, const int c
|
||||||
if (!isUsed()) {
|
if (!isUsed()) {
|
||||||
return UNRELATED_CHAR;
|
return UNRELATED_CHAR;
|
||||||
}
|
}
|
||||||
const int lowerCodePoint = toLowerCase(codePoint);
|
const int lowerCodePoint = CharUtils::toLowerCase(codePoint);
|
||||||
const int baseLowerCodePoint = toBaseCodePoint(lowerCodePoint);
|
const int baseLowerCodePoint = CharUtils::toBaseCodePoint(lowerCodePoint);
|
||||||
for (int i = 0; i < static_cast<int>(mSampledSearchKeyVectors[index].size()); ++i) {
|
for (int i = 0; i < static_cast<int>(mSampledSearchKeyVectors[index].size()); ++i) {
|
||||||
if (mSampledSearchKeyVectors[index][i] == lowerCodePoint
|
if (mSampledSearchKeyVectors[index][i] == lowerCodePoint
|
||||||
|| mSampledSearchKeyVectors[index][i] == baseLowerCodePoint) {
|
|| mSampledSearchKeyVectors[index][i] == baseLowerCodePoint) {
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
#include <cstring> // for memset()
|
#include <cstring> // for memset()
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "char_utils.h"
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "hash_map_compat.h"
|
#include "hash_map_compat.h"
|
||||||
#include "suggest/core/layout/proximity_info_params.h"
|
#include "suggest/core/layout/proximity_info_params.h"
|
||||||
|
|
|
@ -19,9 +19,9 @@
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
#include "char_utils.h"
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "hash_map_compat.h"
|
#include "hash_map_compat.h"
|
||||||
|
#include "suggest/core/dictionary/char_utils.h"
|
||||||
#include "suggest/core/layout/additional_proximity_chars.h"
|
#include "suggest/core/layout/additional_proximity_chars.h"
|
||||||
#include "suggest/core/layout/geometry_utils.h"
|
#include "suggest/core/layout/geometry_utils.h"
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ class ProximityInfoUtils {
|
||||||
if (c == NOT_A_CODE_POINT) {
|
if (c == NOT_A_CODE_POINT) {
|
||||||
return NOT_AN_INDEX;
|
return NOT_AN_INDEX;
|
||||||
}
|
}
|
||||||
const int lowerCode = toLowerCase(c);
|
const int lowerCode = CharUtils::toLowerCase(c);
|
||||||
hash_map_compat<int, int>::const_iterator mapPos = codeToKeyMap->find(lowerCode);
|
hash_map_compat<int, int>::const_iterator mapPos = codeToKeyMap->find(lowerCode);
|
||||||
if (mapPos != codeToKeyMap->end()) {
|
if (mapPos != codeToKeyMap->end()) {
|
||||||
return mapPos->second;
|
return mapPos->second;
|
||||||
|
|
|
@ -16,7 +16,6 @@
|
||||||
|
|
||||||
#include "suggest/core/policy/weighting.h"
|
#include "suggest/core/policy/weighting.h"
|
||||||
|
|
||||||
#include "char_utils.h"
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dicnode/dic_node.h"
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
#include "suggest/core/dicnode/dic_node_profiler.h"
|
#include "suggest/core/dicnode/dic_node_profiler.h"
|
||||||
|
|
|
@ -16,7 +16,6 @@
|
||||||
|
|
||||||
#include "suggest/core/suggest.h"
|
#include "suggest/core/suggest.h"
|
||||||
|
|
||||||
#include "char_utils.h"
|
|
||||||
#include "suggest/core/dicnode/dic_node.h"
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
#include "suggest/core/dicnode/dic_node_priority_queue.h"
|
#include "suggest/core/dicnode/dic_node_priority_queue.h"
|
||||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
|
|
|
@ -19,10 +19,10 @@
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "char_utils.h"
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dicnode/dic_node.h"
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
|
#include "suggest/core/dictionary/char_utils.h"
|
||||||
#include "suggest/core/layout/proximity_info_state.h"
|
#include "suggest/core/layout/proximity_info_state.h"
|
||||||
#include "suggest/core/policy/traversal.h"
|
#include "suggest/core/policy/traversal.h"
|
||||||
#include "suggest/core/session/dic_traverse_session.h"
|
#include "suggest/core/session/dic_traverse_session.h"
|
||||||
|
@ -64,9 +64,9 @@ class TypingTraversal : public Traversal {
|
||||||
}
|
}
|
||||||
const int point0Index = dicNode->getInputIndex(0);
|
const int point0Index = dicNode->getInputIndex(0);
|
||||||
const int currentBaseLowerCodePoint =
|
const int currentBaseLowerCodePoint =
|
||||||
toBaseLowerCase(childDicNode->getNodeCodePoint());
|
CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint());
|
||||||
const int typedBaseLowerCodePoint =
|
const int typedBaseLowerCodePoint =
|
||||||
toBaseLowerCase(traverseSession->getProximityInfoState(0)
|
CharUtils::toBaseLowerCase(traverseSession->getProximityInfoState(0)
|
||||||
->getPrimaryCodePointAt(point0Index));
|
->getPrimaryCodePointAt(point0Index));
|
||||||
return (currentBaseLowerCodePoint != typedBaseLowerCodePoint);
|
return (currentBaseLowerCodePoint != typedBaseLowerCodePoint);
|
||||||
}
|
}
|
||||||
|
@ -172,7 +172,7 @@ class TypingTraversal : public Traversal {
|
||||||
}
|
}
|
||||||
const int c = dicNode->getOutputWordBuf()[0];
|
const int c = dicNode->getOutputWordBuf()[0];
|
||||||
const bool shortCappedWord = dicNode->getDepth()
|
const bool shortCappedWord = dicNode->getDepth()
|
||||||
< ScoringParams::THRESHOLD_SHORT_WORD_LENGTH && isAsciiUpper(c);
|
< ScoringParams::THRESHOLD_SHORT_WORD_LENGTH && CharUtils::isAsciiUpper(c);
|
||||||
return !shortCappedWord
|
return !shortCappedWord
|
||||||
|| probability >= ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED;
|
|| probability >= ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED;
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dicnode/dic_node_utils.h"
|
#include "suggest/core/dicnode/dic_node_utils.h"
|
||||||
|
#include "suggest/core/dictionary/char_utils.h"
|
||||||
#include "suggest/core/layout/touch_position_correction_utils.h"
|
#include "suggest/core/layout/touch_position_correction_utils.h"
|
||||||
#include "suggest/core/policy/weighting.h"
|
#include "suggest/core/policy/weighting.h"
|
||||||
#include "suggest/core/session/dic_traverse_session.h"
|
#include "suggest/core/session/dic_traverse_session.h"
|
||||||
|
@ -98,9 +99,9 @@ class TypingWeighting : public Weighting {
|
||||||
bool isProximityDicNode(const DicTraverseSession *const traverseSession,
|
bool isProximityDicNode(const DicTraverseSession *const traverseSession,
|
||||||
const DicNode *const dicNode) const {
|
const DicNode *const dicNode) const {
|
||||||
const int pointIndex = dicNode->getInputIndex(0);
|
const int pointIndex = dicNode->getInputIndex(0);
|
||||||
const int primaryCodePoint = toBaseLowerCase(
|
const int primaryCodePoint = CharUtils::toBaseLowerCase(
|
||||||
traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(pointIndex));
|
traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(pointIndex));
|
||||||
const int dicNodeChar = toBaseLowerCase(dicNode->getNodeCodePoint());
|
const int dicNodeChar = CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint());
|
||||||
return primaryCodePoint != dicNodeChar;
|
return primaryCodePoint != dicNodeChar;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
#ifndef LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
|
#ifndef LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
|
||||||
#define LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
|
#define LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
|
||||||
|
|
||||||
#include "char_utils.h"
|
#include "suggest/core/dictionary/char_utils.h"
|
||||||
#include "suggest/policyimpl/utils/edit_distance_policy.h"
|
#include "suggest/policyimpl/utils/edit_distance_policy.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -31,8 +31,8 @@ class DamerauLevenshteinEditDistancePolicy : public EditDistancePolicy {
|
||||||
~DamerauLevenshteinEditDistancePolicy() {}
|
~DamerauLevenshteinEditDistancePolicy() {}
|
||||||
|
|
||||||
AK_FORCE_INLINE float getSubstitutionCost(const int index0, const int index1) const {
|
AK_FORCE_INLINE float getSubstitutionCost(const int index0, const int index1) const {
|
||||||
const int c0 = toBaseLowerCase(mString0[index0]);
|
const int c0 = CharUtils::toBaseLowerCase(mString0[index0]);
|
||||||
const int c1 = toBaseLowerCase(mString1[index1]);
|
const int c1 = CharUtils::toBaseLowerCase(mString1[index1]);
|
||||||
return (c0 == c1) ? 0.0f : 1.0f;
|
return (c0 == c1) ? 0.0f : 1.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,10 +45,10 @@ class DamerauLevenshteinEditDistancePolicy : public EditDistancePolicy {
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool allowTransposition(const int index0, const int index1) const {
|
AK_FORCE_INLINE bool allowTransposition(const int index0, const int index1) const {
|
||||||
const int c0 = toBaseLowerCase(mString0[index0]);
|
const int c0 = CharUtils::toBaseLowerCase(mString0[index0]);
|
||||||
const int c1 = toBaseLowerCase(mString1[index1]);
|
const int c1 = CharUtils::toBaseLowerCase(mString1[index1]);
|
||||||
if (index0 > 0 && index1 > 0 && c0 == toBaseLowerCase(mString1[index1 - 1])
|
if (index0 > 0 && index1 > 0 && c0 == CharUtils::toBaseLowerCase(mString1[index1 - 1])
|
||||||
&& c1 == toBaseLowerCase(mString0[index0 - 1])) {
|
&& c1 == CharUtils::toBaseLowerCase(mString0[index0 - 1])) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -18,9 +18,9 @@
|
||||||
|
|
||||||
#define LOG_TAG "LatinIME: unigram_dictionary.cpp"
|
#define LOG_TAG "LatinIME: unigram_dictionary.cpp"
|
||||||
|
|
||||||
#include "char_utils.h"
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/binary_format.h"
|
#include "suggest/core/dictionary/binary_format.h"
|
||||||
|
#include "suggest/core/dictionary/char_utils.h"
|
||||||
#include "suggest/core/dictionary/dictionary.h"
|
#include "suggest/core/dictionary/dictionary.h"
|
||||||
#include "suggest/core/dictionary/digraph_utils.h"
|
#include "suggest/core/dictionary/digraph_utils.h"
|
||||||
#include "suggest/core/dictionary/terminal_attributes.h"
|
#include "suggest/core/dictionary/terminal_attributes.h"
|
||||||
|
@ -696,8 +696,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
|
||||||
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||||
int pos = startPos;
|
int pos = startPos;
|
||||||
int codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
int codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||||
int baseChar = toBaseLowerCase(codePoint);
|
int baseChar = CharUtils::toBaseLowerCase(codePoint);
|
||||||
const int wChar = toBaseLowerCase(inWord[startInputIndex]);
|
const int wChar = CharUtils::toBaseLowerCase(inWord[startInputIndex]);
|
||||||
|
|
||||||
if (baseChar != wChar) {
|
if (baseChar != wChar) {
|
||||||
*outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
|
*outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
|
||||||
|
@ -709,8 +709,9 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
|
||||||
if (hasMultipleChars) {
|
if (hasMultipleChars) {
|
||||||
codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||||
while (NOT_A_CODE_POINT != codePoint) {
|
while (NOT_A_CODE_POINT != codePoint) {
|
||||||
baseChar = toBaseLowerCase(codePoint);
|
baseChar = CharUtils::toBaseLowerCase(codePoint);
|
||||||
if (inputIndex + 1 >= inputSize || toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
|
if (inputIndex + 1 >= inputSize
|
||||||
|
|| CharUtils::toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
|
||||||
*outPos = BinaryFormat::skipOtherCharacters(root, pos);
|
*outPos = BinaryFormat::skipOtherCharacters(root, pos);
|
||||||
*outInputIndex = startInputIndex;
|
*outInputIndex = startInputIndex;
|
||||||
return false;
|
return false;
|
||||||
|
|
Loading…
Reference in New Issue