LatinIME/native/jni/src/digraph_utils.cpp
Tom Ouyang 3970352ea9 Make digraph lookups case insensitive
Example: Be able to gesture dictionary words that start with an upper-case
German umlaut (e.g., Übermensch).

Bug: 8493920
Change-Id: I3253bcd236d34c0af60116e5b41e359f2715bf97
2013-04-16 20:08:42 +09:00

135 lines
5.6 KiB
C++

/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "char_utils.h"
#include "binary_format.h"
#include "defines.h"
#include "digraph_utils.h"
namespace latinime {
const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] =
{ { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
{ 'o', 'e', 0x00F6 }, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
{ 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS
const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] =
{ { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE
{ 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE
const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
{ DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES };
/* static */ bool DigraphUtils::hasDigraphForCodePoint(
const int dictFlags, const int compositeGlyphCodePoint) {
const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(dictFlags);
if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint(digraphType, compositeGlyphCodePoint)) {
return true;
}
return false;
}
// Returns the digraph type associated with the given dictionary.
/* static */ DigraphUtils::DigraphType DigraphUtils::getDigraphTypeForDictionary(
const int dictFlags) {
if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & dictFlags) {
return DIGRAPH_TYPE_GERMAN_UMLAUT;
}
if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & dictFlags) {
return DIGRAPH_TYPE_FRENCH_LIGATURES;
}
return DIGRAPH_TYPE_NONE;
}
// Retrieves the set of all digraphs associated with the given dictionary flags.
// Returns the size of the digraph array, or 0 if none exist.
/* static */ int DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(
const int dictFlags, const DigraphUtils::digraph_t **const digraphs) {
const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(dictFlags);
return getAllDigraphsForDigraphTypeAndReturnSize(digraphType, digraphs);
}
// Returns the digraph codepoint for the given composite glyph codepoint and digraph codepoint index
// (which specifies the first or second codepoint in the digraph).
/* static */ int DigraphUtils::getDigraphCodePointForIndex(const int compositeGlyphCodePoint,
const DigraphCodePointIndex digraphCodePointIndex) {
if (digraphCodePointIndex == NOT_A_DIGRAPH_INDEX) {
return NOT_A_CODE_POINT;
}
const DigraphUtils::digraph_t *const digraph =
DigraphUtils::getDigraphForCodePoint(compositeGlyphCodePoint);
if (!digraph) {
return NOT_A_CODE_POINT;
}
if (digraphCodePointIndex == FIRST_DIGRAPH_CODEPOINT) {
return digraph->first;
} else if (digraphCodePointIndex == SECOND_DIGRAPH_CODEPOINT) {
return digraph->second;
}
ASSERT(false);
return NOT_A_CODE_POINT;
}
// Retrieves the set of all digraphs associated with the given digraph type.
// Returns the size of the digraph array, or 0 if none exist.
/* static */ int DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(
const DigraphUtils::DigraphType digraphType,
const DigraphUtils::digraph_t **const digraphs) {
if (digraphType == DigraphUtils::DIGRAPH_TYPE_GERMAN_UMLAUT) {
*digraphs = GERMAN_UMLAUT_DIGRAPHS;
return NELEMS(GERMAN_UMLAUT_DIGRAPHS);
}
if (digraphType == DIGRAPH_TYPE_FRENCH_LIGATURES) {
*digraphs = FRENCH_LIGATURES_DIGRAPHS;
return NELEMS(FRENCH_LIGATURES_DIGRAPHS);
}
return 0;
}
/**
* Returns the digraph for the input composite glyph codepoint, or 0 if none exists.
* compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
*/
/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForCodePoint(
const int compositeGlyphCodePoint) {
for (size_t i = 0; i < NELEMS(USED_DIGRAPH_TYPES); i++) {
const DigraphUtils::digraph_t *const digraph = getDigraphForDigraphTypeAndCodePoint(
USED_DIGRAPH_TYPES[i], compositeGlyphCodePoint);
if (digraph) {
return digraph;
}
}
return 0;
}
/**
* Returns the digraph for the input composite glyph codepoint, or 0 if none exists.
* digraphType: the type of digraphs supported.
* compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
*/
/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForDigraphTypeAndCodePoint(
const DigraphUtils::DigraphType digraphType, const int compositeGlyphCodePoint) {
const DigraphUtils::digraph_t *digraphs = 0;
const int compositeGlyphLowerCodePoint = toLowerCase(compositeGlyphCodePoint);
const int digraphsSize =
DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(digraphType, &digraphs);
for (int i = 0; i < digraphsSize; i++) {
if (digraphs[i].compositeGlyph == compositeGlyphLowerCodePoint) {
return &digraphs[i];
}
}
return 0;
}
} // namespace latinime