From 281dbc397317707dfb55e4508815f8e906b66bb4 Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Thu, 8 May 2014 16:44:03 +0900 Subject: [PATCH] Add initial reordering rules for Myanmar Bug: 13945569 Change-Id: Ib546ba110e87bf72afa7f42c878cc5f638bff6c7 --- .../inputmethod/event/MyanmarReordering.java | 203 ++++++++++++++- .../latin/settings/SettingsValues.java | 3 +- .../InputLogicTestsReorderingMyanmar.java | 232 ++++++++++++++++++ 3 files changed, 434 insertions(+), 4 deletions(-) create mode 100644 tests/src/com/android/inputmethod/latin/InputLogicTestsReorderingMyanmar.java diff --git a/java/src/com/android/inputmethod/event/MyanmarReordering.java b/java/src/com/android/inputmethod/event/MyanmarReordering.java index 0831b63ec..27b8c14bb 100644 --- a/java/src/com/android/inputmethod/event/MyanmarReordering.java +++ b/java/src/com/android/inputmethod/event/MyanmarReordering.java @@ -16,23 +16,220 @@ package com.android.inputmethod.event; +import com.android.inputmethod.latin.Constants; +import com.android.inputmethod.latin.utils.CollectionUtils; + import java.util.ArrayList; +import java.util.Arrays; /** * A combiner that reorders input for Myanmar. */ public class MyanmarReordering implements Combiner { + // U+1031 MYANMAR VOWEL SIGN E + private final static int VOWEL_E = 0x1031; // Code point for vowel E that we need to reorder + // U+200C ZERO WIDTH NON-JOINER + // U+200B ZERO WIDTH SPACE + private final static int ZERO_WIDTH_NON_JOINER = 0x200B; // should be 0x200C + + private final ArrayList mCurrentEvents = CollectionUtils.newArrayList(); + + // List of consonants : + // U+1000 MYANMAR LETTER KA + // U+1001 MYANMAR LETTER KHA + // U+1002 MYANMAR LETTER GA + // U+1003 MYANMAR LETTER GHA + // U+1004 MYANMAR LETTER NGA + // U+1005 MYANMAR LETTER CA + // U+1006 MYANMAR LETTER CHA + // U+1007 MYANMAR LETTER JA + // U+1008 MYANMAR LETTER JHA + // U+1009 MYANMAR LETTER NYA + // U+100A MYANMAR LETTER NNYA + // U+100B MYANMAR LETTER TTA + // U+100C MYANMAR LETTER TTHA + // U+100D MYANMAR LETTER DDA + // U+100E MYANMAR LETTER DDHA + // U+100F MYANMAR LETTER NNA + // U+1010 MYANMAR LETTER TA + // U+1011 MYANMAR LETTER THA + // U+1012 MYANMAR LETTER DA + // U+1013 MYANMAR LETTER DHA + // U+1014 MYANMAR LETTER NA + // U+1015 MYANMAR LETTER PA + // U+1016 MYANMAR LETTER PHA + // U+1017 MYANMAR LETTER BA + // U+1018 MYANMAR LETTER BHA + // U+1019 MYANMAR LETTER MA + // U+101A MYANMAR LETTER YA + // U+101B MYANMAR LETTER RA + // U+101C MYANMAR LETTER LA + // U+101D MYANMAR LETTER WA + // U+101E MYANMAR LETTER SA + // U+101F MYANMAR LETTER HA + // U+1020 MYANMAR LETTER LLA + // U+103F MYANMAR LETTER GREAT SA + private static boolean isConsonant(final int codePoint) { + return (codePoint >= 0x1000 && codePoint <= 0x1020) || 0x103F == codePoint; + } + + // List of medials : + // U+103B MYANMAR CONSONANT SIGN MEDIAL YA + // U+103C MYANMAR CONSONANT SIGN MEDIAL RA + // U+103D MYANMAR CONSONANT SIGN MEDIAL WA + // U+103E MYANMAR CONSONANT SIGN MEDIAL HA + // U+105E MYANMAR CONSONANT SIGN MON MEDIAL NA + // U+105F MYANMAR CONSONANT SIGN MON MEDIAL MA + // U+1060 MYANMAR CONSONANT SIGN MON MEDIAL LA + // U+1082 MYANMAR CONSONANT SIGN SHAN MEDIAL WA + private static int[] MEDIAL_LIST = { 0x103B, 0x103C, 0x103D, 0x103E, + 0x105E, 0x105F, 0x1060, 0x1082}; + private static boolean isMedial(final int codePoint) { + return Arrays.binarySearch(MEDIAL_LIST, codePoint) >= 0; + } + + private static boolean isConsonantOrMedial(final int codePoint) { + return isConsonant(codePoint) || isMedial(codePoint); + } + + private Event getLastEvent() { + final int size = mCurrentEvents.size(); + if (size <= 0) { + return null; + } + return mCurrentEvents.get(size - 1); + } + + private CharSequence getCharSequence() { + final StringBuilder s = new StringBuilder(); + for (final Event e : mCurrentEvents) { + s.appendCodePoint(e.mCodePoint); + } + return s; + } + + /** + * Clears the currently combining stream of events and returns the resulting software text + * event corresponding to the stream. Optionally adds a new event to the cleared stream. + * @param newEvent the new event to add to the stream. null if none. + * @return the resulting software text event. Null if none. + */ + private Event clearAndGetResultingEvent(final Event newEvent) { + final CharSequence combinedText; + if (mCurrentEvents.size() > 0) { + combinedText = getCharSequence(); + mCurrentEvents.clear(); + } else { + combinedText = null; + } + if (null != newEvent) { + mCurrentEvents.add(newEvent); + } + return null == combinedText ? null + : Event.createSoftwareTextEvent(combinedText, Event.NOT_A_KEY_CODE); + } + @Override - public Event processEvent(ArrayList previousEvents, Event event) { - return event; + public Event processEvent(ArrayList previousEvents, Event newEvent) { + final int codePoint = newEvent.mCodePoint; + if (VOWEL_E == codePoint) { + final Event lastEvent = getLastEvent(); + if (null == lastEvent) { + mCurrentEvents.add(newEvent); + return null; + } else if (isConsonantOrMedial(lastEvent.mCodePoint)) { + final Event resultingEvent = clearAndGetResultingEvent(null); + mCurrentEvents.add(Event.createSoftwareKeypressEvent(ZERO_WIDTH_NON_JOINER, + Event.NOT_A_KEY_CODE, + Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE, + false /* isKeyRepeat */)); + mCurrentEvents.add(newEvent); + return resultingEvent; + } else { // VOWEL_E == lastCodePoint. But if that was anything else this is correct too. + return clearAndGetResultingEvent(newEvent); + } + } if (isConsonant(codePoint)) { + final Event lastEvent = getLastEvent(); + if (null == lastEvent) { + mCurrentEvents.add(newEvent); + return null; + } else if (VOWEL_E == lastEvent.mCodePoint) { + final int eventSize = mCurrentEvents.size(); + if (eventSize >= 2 + && mCurrentEvents.get(eventSize - 2).mCodePoint == ZERO_WIDTH_NON_JOINER) { + // We have a ZWJN before a vowel E. We need to remove the ZWNJ and then + // reorder the vowel with respect to the consonant. + mCurrentEvents.remove(eventSize - 1); + mCurrentEvents.remove(eventSize - 2); + mCurrentEvents.add(newEvent); + mCurrentEvents.add(lastEvent); + return null; + } + // If there is already a consonant, then we are starting a new syllable. + for (int i = eventSize - 2; i >= 0; --i) { + if (isConsonant(mCurrentEvents.get(i).mCodePoint)) { + return clearAndGetResultingEvent(newEvent); + } + } + // If we come here, we didn't have a consonant so we reorder + mCurrentEvents.remove(eventSize - 1); + mCurrentEvents.add(newEvent); + mCurrentEvents.add(lastEvent); + return null; + } else { // lastCodePoint is a consonant/medial. But if it's something else it's fine + return clearAndGetResultingEvent(newEvent); + } + } else if (isMedial(codePoint)) { + final Event lastEvent = getLastEvent(); + if (null == lastEvent) { + mCurrentEvents.add(newEvent); + return null; + } else if (VOWEL_E == lastEvent.mCodePoint) { + final int eventSize = mCurrentEvents.size(); + // If there is already a consonant, then we are in the middle of a syllable, and we + // need to reorder. + boolean hasConsonant = false; + for (int i = eventSize - 2; i >= 0; --i) { + if (isConsonant(mCurrentEvents.get(i).mCodePoint)) { + hasConsonant = true; + break; + } + } + if (hasConsonant) { + mCurrentEvents.remove(eventSize - 1); + mCurrentEvents.add(newEvent); + mCurrentEvents.add(lastEvent); + return null; + } + // Otherwise, we just commit everything. + return clearAndGetResultingEvent(null); + } else { // lastCodePoint is a consonant/medial. But if it's something else it's fine + return clearAndGetResultingEvent(newEvent); + } + } else if (Constants.CODE_DELETE == newEvent.mKeyCode) { + if (mCurrentEvents.size() > 0) { + mCurrentEvents.remove(mCurrentEvents.size() - 1); + return null; + } + } + // This character is not part of the combining scheme, so we should reset everything. + if (mCurrentEvents.size() > 0) { + // If we have events in flight, then add the new event and return the resulting event. + mCurrentEvents.add(newEvent); + return clearAndGetResultingEvent(null); + } else { + // If we don't have any events in flight, then just pass this one through. + return newEvent; + } } @Override public CharSequence getCombiningStateFeedback() { - return ""; + return getCharSequence(); } @Override public void reset() { + mCurrentEvents.clear(); } } diff --git a/java/src/com/android/inputmethod/latin/settings/SettingsValues.java b/java/src/com/android/inputmethod/latin/settings/SettingsValues.java index dde50ccaf..de2eb951e 100644 --- a/java/src/com/android/inputmethod/latin/settings/SettingsValues.java +++ b/java/src/com/android/inputmethod/latin/settings/SettingsValues.java @@ -205,7 +205,8 @@ public final class SettingsValues { } public boolean isWordCodePoint(final int code) { - return Character.isLetter(code) || isWordConnector(code); + return Character.isLetter(code) || isWordConnector(code) + || Character.COMBINING_SPACING_MARK == Character.getType(code); } public boolean isUsuallyPrecededBySpace(final int code) { diff --git a/tests/src/com/android/inputmethod/latin/InputLogicTestsReorderingMyanmar.java b/tests/src/com/android/inputmethod/latin/InputLogicTestsReorderingMyanmar.java new file mode 100644 index 000000000..b3f2819cf --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/InputLogicTestsReorderingMyanmar.java @@ -0,0 +1,232 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin; + +import android.test.suitebuilder.annotation.LargeTest; +import android.util.Pair; + +/* + * Relevant characters for this test : + * Spurs the need to reorder : + * U+1031 MYANMAR VOWEL SIGN E : ေ + * U+1004 U+103A U+1039 Kinzi. It's a compound character. + * + * List of consonants : + * U+1000 MYANMAR LETTER KA က + * U+1001 MYANMAR LETTER KHA ခ + * U+1002 MYANMAR LETTER GA ဂ + * U+1003 MYANMAR LETTER GHA ဃ + * U+1004 MYANMAR LETTER NGA င + * U+1005 MYANMAR LETTER CA စ + * U+1006 MYANMAR LETTER CHA ဆ + * U+1007 MYANMAR LETTER JA ဇ + * U+1008 MYANMAR LETTER JHA ဈ + * U+1009 MYANMAR LETTER NYA ဉ + * U+100A MYANMAR LETTER NNYA ည + * U+100B MYANMAR LETTER TTA ဋ + * U+100C MYANMAR LETTER TTHA ဌ + * U+100D MYANMAR LETTER DDA ဍ + * U+100E MYANMAR LETTER DDHA ဎ + * U+100F MYANMAR LETTER NNA ဏ + * U+1010 MYANMAR LETTER TA တ + * U+1011 MYANMAR LETTER THA ထ + * U+1012 MYANMAR LETTER DA ဒ + * U+1013 MYANMAR LETTER DHA ဓ + * U+1014 MYANMAR LETTER NA န + * U+1015 MYANMAR LETTER PA ပ + * U+1016 MYANMAR LETTER PHA ဖ + * U+1017 MYANMAR LETTER BA ဗ + * U+1018 MYANMAR LETTER BHA ဘ + * U+1019 MYANMAR LETTER MA မ + * U+101A MYANMAR LETTER YA ယ + * U+101B MYANMAR LETTER RA ရ + * U+101C MYANMAR LETTER LA လ + * U+101D MYANMAR LETTER WA ဝ + * U+101E MYANMAR LETTER SA သ + * U+101F MYANMAR LETTER HA ဟ + * U+1020 MYANMAR LETTER LLA ဠ + * U+103F MYANMAR LETTER GREAT SA ဿ + * + * List of medials : + * U+103B MYANMAR CONSONANT SIGN MEDIAL YA ျ + * U+103C MYANMAR CONSONANT SIGN MEDIAL RA ြ + * U+103D MYANMAR CONSONANT SIGN MEDIAL WA ွ + * U+103E MYANMAR CONSONANT SIGN MEDIAL HA ှ + * U+105E MYANMAR CONSONANT SIGN MON MEDIAL NA ၞ + * U+105F MYANMAR CONSONANT SIGN MON MEDIAL MA ၟ + * U+1060 MYANMAR CONSONANT SIGN MON MEDIAL LA ၠ + * U+1082 MYANMAR CONSONANT SIGN SHAN MEDIAL WA ႂ + * + * Other relevant characters : + * U+200C ZERO WIDTH NON-JOINER + * U+200B ZERO WIDTH SPACE + */ + +@LargeTest +public class InputLogicTestsReorderingMyanmar extends InputTestsBase { + // The tests are formatted as follows. + // Each test is an entry in the array of Pair arrays. + + // One test is an array of pairs. Each pair contains, in the `first' member, + // the code points that the next key press should contain. In the `second' + // member is stored the string that should be in the text view after this + // key press. + + private static final Pair[][] TESTS = { + + // Tests for U+1031 MYANMAR VOWEL SIGN E : ေ + new Pair[] { // Type : U+1031 U+1000 U+101F ေ က ဟ + Pair.create(new int[] { 0x1031 }, "\u1031"), // ေ + Pair.create(new int[] { 0x1000 }, "\u1000\u1031"), // ကေ + Pair.create(new int[] { 0x101F }, "\u1000\u1031\u101F") // ကေဟ + }, + + new Pair[] { // Type : U+1000 U+1031 U+101F က ေ ဟ + Pair.create(new int[] { 0x1000 }, "\u1000"), // က + Pair.create(new int[] { 0x1031 }, "\u1000\u200B\u1031"), // က‌ေ + Pair.create(new int[] { 0x101F }, "\u1000\u101F\u1031") // ကဟေ + }, + + new Pair[] { // Type : U+1031 U+101D U+103E U+1018 ေ ဝ ှ ဘ + Pair.create(new int[] { 0x1031 }, "\u1031"), // ေ + Pair.create(new int[] { 0x101D }, "\u101D\u1031"), // ဝေ + Pair.create(new int[] { 0x103E }, "\u101D\u103E\u1031"), // ဝှေ + Pair.create(new int[] { 0x1018 }, "\u101D\u103E\u1031\u1018") // ဝှေဘ + }, + + new Pair[] { // Type : U+1031 U+1014 U+1031 U+1000 U+102C U+1004 U+103A U+1038 U+101C + // U+102C U+1038 U+104B ေ န ေ က ာ င ် း လ ာ း ။ + Pair.create(new int[] { 0x1031 }, "\u1031"), // ေ + Pair.create(new int[] { 0x1014 }, "\u1014\u1031"), // နေ + Pair.create(new int[] { 0x1031 }, "\u1014\u1031\u1031"), // နေ‌ေ + Pair.create(new int[] { 0x1000 }, "\u1014\u1031\u1000\u1031"), // နေကေ + Pair.create(new int[] { 0x102C }, "\u1014\u1031\u1000\u1031\u102C"), // နေကော + Pair.create(new int[] { 0x1004 }, "\u1014\u1031\u1000\u1031\u102C\u1004"), // နေကောင + Pair.create(new int[] { 0x103A }, // နေကောင် + "\u1014\u1031\u1000\u1031\u102C\u1004\u103A"), + Pair.create(new int[] { 0x1038 }, // နေကောင်း + "\u1014\u1031\u1000\u1031\u102C\u1004\u103A\u1038"), + Pair.create(new int[] { 0x101C }, // နေကောင်းလ + "\u1014\u1031\u1000\u1031\u102C\u1004\u103A\u1038\u101C"), + Pair.create(new int[] { 0x102C }, // နေကောင်းလာ + "\u1014\u1031\u1000\u1031\u102C\u1004\u103A\u1038\u101C\u102C"), + Pair.create(new int[] { 0x1038 }, // နေကောင်းလား + "\u1014\u1031\u1000\u1031\u102C\u1004\u103A\u1038\u101C\u102C\u1038"), + Pair.create(new int[] { 0x104B }, // နေကောင်းလား။ + "\u1014\u1031\u1000\u1031\u102C\u1004\u103A\u1038\u101C\u102C\u1038\u104B") + }, + + new Pair[] { // Type : U+1031 U+1031 U+1031 U+1000 ေ ေ ေ က + Pair.create(new int[] { 0x1031 }, "\u1031"), // ေ + Pair.create(new int[] { 0x1031 }, "\u1031\u1031"), // ေေ + Pair.create(new int[] { 0x1031 }, "\u1031\u1031\u1031"), // U+1031ေေေ + Pair.create(new int[] { 0x1000 }, "\u1031\u1031\u1000\u1031") // ေေကေ + }, + + new Pair[] { // Type : U+1031 U+1001 U+103B U+103D U+1038 ေ ခ ျ ွ း + Pair.create(new int[] { 0x1031 }, "\u1031"), // ေ + Pair.create(new int[] { 0x1001 }, "\u1001\u1031"), // ခေ + Pair.create(new int[] { 0x103B }, "\u1001\u103B\u1031"), // ချေ + Pair.create(new int[] { 0x103D }, "\u1001\u103B\u103D\u1031"), // ချွေ + Pair.create(new int[] { 0x1038 }, "\u1001\u103B\u103D\u1031\u1038") // ချွေး + }, + + // Tests for Kinzi U+1004 U+103A U+1039 : + + /* Kinzi reordering is not implemented yet. Uncomment these tests when it is. + + new Pair[] { // Type : U+1021 U+1002 (U+1004 U+103A U+1039) + // U+101C U+1014 U+103A အ ဂ (င ် ္) လ န ် + Pair.create(new int[] { 0x1021 }, "\u1021"), // အ + Pair.create(new int[] { 0x1002 }, "\u1021\u1002"), // အဂ + Pair.create(new int[] { 0x1004, 0x103A, 0x1039 }, // အင်္ဂ + "\u1021\u1004\u103A\u1039\u1002"), + Pair.create(new int[] { 0x101C }, // အင်္ဂလ + "\u1021\u1004\u103A\u1039\u1002\u101C"), + Pair.create(new int[] { 0x1014 }, // အင်္ဂလန + "\u1021\u1004\u103A\u1039\u1002\u101C\u1014"), + Pair.create(new int[] { 0x103A }, // အင်္ဂလန် + "\u1021\u1004\u103A\u1039\u1002\u101C\u1014\u103A") + }, + + new Pair[] { //Type : kinzi after a whole syllable U+101E U+1001 U+103B U+102D U+102F + // (U+1004 U+103A U+1039) U+1004 U+103A U+1038 သ ခ ျ ိ ု င ် ္ င ် း + Pair.create(new int[] { 0x101E }, "\u101E"), // သခ + Pair.create(new int[] { 0x1001 }, "\u101E\u1001"), // သခ + Pair.create(new int[] { 0x103B }, "\u101E\u1001\u103B"), // သချ + Pair.create(new int[] { 0x102D }, "\u101E\u1001\u103B\u102D"), // သချိ + Pair.create(new int[] { 0x102F }, "\u101E\u1001\u103B\u102D\u102F"), // သချို + Pair.create(new int[] { 0x1004, 0x103A, 0x1039}, // သင်္ချို + "\u101E\u1004\u103A\u1039\u1001\u103B\u102D\u102F"), + Pair.create(new int[] { 0x1004 }, // သင်္ချိုင + "\u101E\u1004\u103A\u1039\u1001\u103B\u102D\u102F\u1004"), + Pair.create(new int[] { 0x103A }, // သင်္ချိုင် + "\u101E\u1004\u103A\u1039\u1001\u103B\u102D\u102F\u1004\u103A"), + Pair.create(new int[] { 0x1038 }, // သင်္ချိုင်း + "\u101E\u1004\u103A\u1039\u1001\u103B\u102D\u102F\u1004\u103A\u1038") + }, + + new Pair[] { // Type : kinzi after the consonant U+101E U+1001 (U+1004 U+103A U+1039) + // U+103B U+102D U+102F U+1004 U+103A U+1038 သ ခ င ် ္ ျ ိ ု င ် း + Pair.create(new int[] { 0x101E }, "\u101E"), // သခ + Pair.create(new int[] { 0x1001 }, "\u101E\u1001"), // သခ + Pair.create(new int[] { 0x1004, 0x103A, 0x1039 }, // သင်္ခ + "\u101E\u1004\u103A\u1039\u1001"), + Pair.create(new int[] { 0x103B }, // သင်္ချ + "\u101E\u1004\u103A\u1039\u1001\u103B"), + Pair.create(new int[] { 0x102D }, // သင်္ချိ + "\u101E\u1004\u103A\u1039\u1001\u103B\u102D"), + Pair.create(new int[] { 0x102F }, // သင်္ချို + "\u101E\u1004\u103A\u1039\u1001\u103B\u102D\u102F"), + Pair.create(new int[] { 0x1004 }, // သင်္ချိုင + "\u101E\u1004\u103A\u1039\u1001\u103B\u102D\u102F\u1004"), + Pair.create(new int[] { 0x103A }, // သင်္ချိုင် + "\u101E\u1004\u103A\u1039\u1001\u103B\u102D\u102F\u1004\u103A"), + Pair.create(new int[] { 0x1038 }, // သင်္ချိုင်း + "\u101E\u1004\u103A\u1039\u1001\u103B\u102D\u102F\u1004\u103A\u1038") + }, + */ + }; + + private void doMyanmarTest(final int testNumber, final Pair[] test) { + int stepNumber = 0; + for (final Pair step : test) { + ++stepNumber; + final int[] input = step.first; + final String expectedResult = step.second; + if (input.length > 1) { + mLatinIME.onTextInput(new String(input, 0, input.length)); + } else { + type(input[0]); + } + assertEquals("Myanmar reordering test " + testNumber + ", step " + stepNumber, + expectedResult, mEditText.getText().toString()); + } + } + + public void testMyanmarReordering() { + int testNumber = 0; + changeLanguage("mm_MY"); + for (final Pair[] test : TESTS) { + // Small trick to reset LatinIME : setText("") and send updateSelection with values + // LatinIME has never seen, and cursor pos 0,0. + mEditText.setText(""); + mLatinIME.onUpdateSelection(1, 1, 0, 0, -1, -1); + doMyanmarTest(++testNumber, test); + } + } +}