From 2fdf5b70adab1cd7eaa62319524e6c79f2cbd14b Mon Sep 17 00:00:00 2001 From: Tom Ouyang Date: Tue, 2 Apr 2013 17:21:08 -0700 Subject: [PATCH] Add DigraphUtils class Add a new DigraphUtils class to manage digraph handling for both gesture and typing input. Bug: 8493920 Change-Id: I3a509e1311b9039653f9f488d3c28bb54205f416 --- native/jni/Android.mk | 1 + native/jni/src/digraph_utils.cpp | 93 +++++++++++++++++++++++++++ native/jni/src/digraph_utils.h | 47 ++++++++++++++ native/jni/src/unigram_dictionary.cpp | 31 +++------ native/jni/src/unigram_dictionary.h | 10 +-- 5 files changed, 153 insertions(+), 29 deletions(-) create mode 100644 native/jni/src/digraph_utils.cpp create mode 100644 native/jni/src/digraph_utils.h diff --git a/native/jni/Android.mk b/native/jni/Android.mk index 42275d45a..8bd996f0f 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -59,6 +59,7 @@ LATIN_IME_CORE_SRC_FILES := \ correction.cpp \ dictionary.cpp \ dic_traverse_wrapper.cpp \ + digraph_utils.cpp \ proximity_info.cpp \ proximity_info_params.cpp \ proximity_info_state.cpp \ diff --git a/native/jni/src/digraph_utils.cpp b/native/jni/src/digraph_utils.cpp new file mode 100644 index 000000000..8781c5077 --- /dev/null +++ b/native/jni/src/digraph_utils.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_format.h" +#include "defines.h" +#include "digraph_utils.h" + +namespace latinime { + +const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] = + { { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS + { 'o', 'e', 0x00F6 }, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS + { 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS +const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] = + { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE + { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE + +/* static */ bool DigraphUtils::hasDigraphForCodePoint( + const int dictFlags, const int compositeGlyphCodePoint) { + if (DigraphUtils::getDigraphForCodePoint(dictFlags, compositeGlyphCodePoint)) { + return true; + } + return false; +} + +// Retrieves the set of all digraphs associated with the given dictionary. +// Returns the size of the digraph array, or 0 if none exist. +/* static */ int DigraphUtils::getAllDigraphsForDictionaryAndReturnSize( + const int dictFlags, const DigraphUtils::digraph_t **digraphs) { + if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & dictFlags) { + *digraphs = DigraphUtils::GERMAN_UMLAUT_DIGRAPHS; + return NELEMS(DigraphUtils::GERMAN_UMLAUT_DIGRAPHS); + } + if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & dictFlags) { + *digraphs = DigraphUtils::FRENCH_LIGATURES_DIGRAPHS; + return NELEMS(DigraphUtils::FRENCH_LIGATURES_DIGRAPHS); + } + return 0; +} + +// Returns the digraph codepoint for the given composite glyph codepoint and digraph codepoint index +// (which specifies the first or second codepoint in the digraph). +/* static */ int DigraphUtils::getDigraphCodePointForIndex(const int dictFlags, + const int compositeGlyphCodePoint, const DigraphCodePointIndex digraphCodePointIndex) { + if (digraphCodePointIndex == NOT_A_DIGRAPH_INDEX) { + return NOT_A_CODE_POINT; + } + const DigraphUtils::digraph_t *digraph = + DigraphUtils::getDigraphForCodePoint(dictFlags, compositeGlyphCodePoint); + if (!digraph) { + return NOT_A_CODE_POINT; + } + if (digraphCodePointIndex == FIRST_DIGRAPH_CODEPOINT) { + return digraph->first; + } else if (digraphCodePointIndex == SECOND_DIGRAPH_CODEPOINT) { + return digraph->second; + } + ASSERT(false); + return NOT_A_CODE_POINT; +} + +/** + * Returns the digraph for the input composite glyph codepoint, or 0 if none exists. + * dictFlags: the dictionary flags needed to determine which digraphs are supported. + * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint. + */ +/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForCodePoint( + const int dictFlags, const int compositeGlyphCodePoint) { + const DigraphUtils::digraph_t *digraphs = 0; + const int digraphsSize = + DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(dictFlags, &digraphs); + for (int i = 0; i < digraphsSize; i++) { + if (digraphs[i].compositeGlyph == compositeGlyphCodePoint) { + return &digraphs[i]; + } + } + return 0; +} + +} // namespace latinime diff --git a/native/jni/src/digraph_utils.h b/native/jni/src/digraph_utils.h new file mode 100644 index 000000000..6e364b67a --- /dev/null +++ b/native/jni/src/digraph_utils.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DIGRAPH_UTILS_H +#define DIGRAPH_UTILS_H + +namespace latinime { + +class DigraphUtils { + public: + typedef enum { + NOT_A_DIGRAPH_INDEX, + FIRST_DIGRAPH_CODEPOINT, + SECOND_DIGRAPH_CODEPOINT + } DigraphCodePointIndex; + + typedef struct { int first; int second; int compositeGlyph; } digraph_t; + + static bool hasDigraphForCodePoint(const int dictFlags, const int compositeGlyphCodePoint); + static int getAllDigraphsForDictionaryAndReturnSize( + const int dictFlags, const digraph_t **digraphs); + static int getDigraphCodePointForIndex(const int dictFlags, const int compositeGlyphCodePoint, + const DigraphCodePointIndex digraphCodePointIndex); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DigraphUtils); + static const digraph_t *getDigraphForCodePoint( + const int dictFlags, const int compositeGlyphCodePoint); + + static const digraph_t GERMAN_UMLAUT_DIGRAPHS[]; + static const digraph_t FRENCH_LIGATURES_DIGRAPHS[]; +}; +} // namespace latinime +#endif // DIGRAPH_UTILS_H diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index 80ba412a3..33795cade 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -22,6 +22,7 @@ #include "char_utils.h" #include "defines.h" #include "dictionary.h" +#include "digraph_utils.h" #include "proximity_info.h" #include "terminal_attributes.h" #include "unigram_dictionary.h" @@ -30,15 +31,6 @@ namespace latinime { -const UnigramDictionary::digraph_t UnigramDictionary::GERMAN_UMLAUT_DIGRAPHS[] = - { { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS - { 'o', 'e', 0x00F6 }, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS - { 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS - -const UnigramDictionary::digraph_t UnigramDictionary::FRENCH_LIGATURES_DIGRAPHS[] = - { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE - { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE - // TODO: check the header UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, const unsigned int flags) : DICT_ROOT(streamStart), ROOT_POS(0), @@ -58,7 +50,7 @@ static void addWord(int *word, int length, int probability, WordsPriorityQueue * // Return the replacement code point for a digraph, or 0 if none. int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, const int inputSize, - const digraph_t *const digraphs, const unsigned int digraphsSize) const { + const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const { // There can't be a digraph if we don't have at least 2 characters to examine if (i + 2 > inputSize) return false; @@ -74,7 +66,7 @@ int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, cons // It's an interesting digraph if the second char matches too. if (digraphs[lastDigraphIndex].second == codes[i + 1]) { - return digraphs[lastDigraphIndex].replacement; + return digraphs[lastDigraphIndex].compositeGlyph; } else { return 0; } @@ -93,7 +85,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit const bool useFullEditDistance, const int *codesSrc, const int codesRemain, const int currentDepth, int *codesDest, Correction *correction, WordsPriorityQueuePool *queuePool, - const digraph_t *const digraphs, const unsigned int digraphsSize) const { + const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const { ASSERT(sizeof(codesDest[0]) == sizeof(codesSrc[0])); ASSERT(sizeof(xCoordinatesBuffer[0]) == sizeof(xcoordinates[0])); ASSERT(sizeof(yCoordinatesBuffer[0]) == sizeof(ycoordinates[0])); @@ -169,7 +161,10 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x queuePool.clearAll(); Correction masterCorrection; masterCorrection.resetCorrection(); - if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & FLAGS) + const DigraphUtils::digraph_t *digraphs = 0; + const int digraphsSize = + DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(FLAGS, &digraphs); + if (digraphsSize > 0) { // Incrementally tune the word and try all possibilities int codesBuffer[sizeof(*inputCodePoints) * inputSize]; int xCoordinatesBuffer[inputSize]; @@ -177,15 +172,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, inputSize, bigramMap, bigramFilter, useFullEditDistance, inputCodePoints, inputSize, 0, codesBuffer, &masterCorrection, - &queuePool, GERMAN_UMLAUT_DIGRAPHS, NELEMS(GERMAN_UMLAUT_DIGRAPHS)); - } else if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & FLAGS) { - int codesBuffer[sizeof(*inputCodePoints) * inputSize]; - int xCoordinatesBuffer[inputSize]; - int yCoordinatesBuffer[inputSize]; - getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, - xCoordinatesBuffer, yCoordinatesBuffer, inputSize, bigramMap, bigramFilter, - useFullEditDistance, inputCodePoints, inputSize, 0, codesBuffer, &masterCorrection, - &queuePool, FRENCH_LIGATURES_DIGRAPHS, NELEMS(FRENCH_LIGATURES_DIGRAPHS)); + &queuePool, digraphs, digraphsSize); } else { // Normal processing getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, inputCodePoints, inputSize, bigramMap, bigramFilter, useFullEditDistance, &masterCorrection, &queuePool); diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h index c1955e8bb..1a01758fe 100644 --- a/native/jni/src/unigram_dictionary.h +++ b/native/jni/src/unigram_dictionary.h @@ -20,6 +20,7 @@ #include #include #include "defines.h" +#include "digraph_utils.h" namespace latinime { @@ -29,8 +30,6 @@ class TerminalAttributes; class WordsPriorityQueuePool; class UnigramDictionary { - typedef struct { int first; int second; int replacement; } digraph_t; - public: // Error tolerances static const int DEFAULT_MAX_ERRORS = 2; @@ -57,13 +56,13 @@ class UnigramDictionary { const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool) const; int getDigraphReplacement(const int *codes, const int i, const int inputSize, - const digraph_t *const digraphs, const unsigned int digraphsSize) const; + const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const; void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codesBuffer, int *xCoordinatesBuffer, int *yCoordinatesBuffer, const int codesBufferSize, const std::map *bigramMap, const uint8_t *bigramFilter, const bool useFullEditDistance, const int *codesSrc, const int codesRemain, const int currentDepth, int *codesDest, Correction *correction, - WordsPriorityQueuePool *queuePool, const digraph_t *const digraphs, + WordsPriorityQueuePool *queuePool, const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const; void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int inputSize, @@ -111,9 +110,6 @@ class UnigramDictionary { const int ROOT_POS; const int MAX_DIGRAPH_SEARCH_DEPTH; const int FLAGS; - - static const digraph_t GERMAN_UMLAUT_DIGRAPHS[]; - static const digraph_t FRENCH_LIGATURES_DIGRAPHS[]; }; } // namespace latinime #endif // LATINIME_UNIGRAM_DICTIONARY_H