Merge "Add initial reordering rules for Myanmar"

This commit is contained in:
Jean Chalard 2014-05-12 11:29:53 +00:00 committed by Android (Google) Code Review
commit 61ddac28de
3 changed files with 434 additions and 4 deletions

View file

@ -16,23 +16,220 @@
package com.android.inputmethod.event;
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.utils.CollectionUtils;
import java.util.ArrayList;
import java.util.Arrays;
/**
* A combiner that reorders input for Myanmar.
*/
public class MyanmarReordering implements Combiner {
// U+1031 MYANMAR VOWEL SIGN E
private final static int VOWEL_E = 0x1031; // Code point for vowel E that we need to reorder
// U+200C ZERO WIDTH NON-JOINER
// U+200B ZERO WIDTH SPACE
private final static int ZERO_WIDTH_NON_JOINER = 0x200B; // should be 0x200C
private final ArrayList<Event> mCurrentEvents = CollectionUtils.newArrayList();
// List of consonants :
// U+1000 MYANMAR LETTER KA
// U+1001 MYANMAR LETTER KHA
// U+1002 MYANMAR LETTER GA
// U+1003 MYANMAR LETTER GHA
// U+1004 MYANMAR LETTER NGA
// U+1005 MYANMAR LETTER CA
// U+1006 MYANMAR LETTER CHA
// U+1007 MYANMAR LETTER JA
// U+1008 MYANMAR LETTER JHA
// U+1009 MYANMAR LETTER NYA
// U+100A MYANMAR LETTER NNYA
// U+100B MYANMAR LETTER TTA
// U+100C MYANMAR LETTER TTHA
// U+100D MYANMAR LETTER DDA
// U+100E MYANMAR LETTER DDHA
// U+100F MYANMAR LETTER NNA
// U+1010 MYANMAR LETTER TA
// U+1011 MYANMAR LETTER THA
// U+1012 MYANMAR LETTER DA
// U+1013 MYANMAR LETTER DHA
// U+1014 MYANMAR LETTER NA
// U+1015 MYANMAR LETTER PA
// U+1016 MYANMAR LETTER PHA
// U+1017 MYANMAR LETTER BA
// U+1018 MYANMAR LETTER BHA
// U+1019 MYANMAR LETTER MA
// U+101A MYANMAR LETTER YA
// U+101B MYANMAR LETTER RA
// U+101C MYANMAR LETTER LA
// U+101D MYANMAR LETTER WA
// U+101E MYANMAR LETTER SA
// U+101F MYANMAR LETTER HA
// U+1020 MYANMAR LETTER LLA
// U+103F MYANMAR LETTER GREAT SA
private static boolean isConsonant(final int codePoint) {
return (codePoint >= 0x1000 && codePoint <= 0x1020) || 0x103F == codePoint;
}
// List of medials :
// U+103B MYANMAR CONSONANT SIGN MEDIAL YA
// U+103C MYANMAR CONSONANT SIGN MEDIAL RA
// U+103D MYANMAR CONSONANT SIGN MEDIAL WA
// U+103E MYANMAR CONSONANT SIGN MEDIAL HA
// U+105E MYANMAR CONSONANT SIGN MON MEDIAL NA
// U+105F MYANMAR CONSONANT SIGN MON MEDIAL MA
// U+1060 MYANMAR CONSONANT SIGN MON MEDIAL LA
// U+1082 MYANMAR CONSONANT SIGN SHAN MEDIAL WA
private static int[] MEDIAL_LIST = { 0x103B, 0x103C, 0x103D, 0x103E,
0x105E, 0x105F, 0x1060, 0x1082};
private static boolean isMedial(final int codePoint) {
return Arrays.binarySearch(MEDIAL_LIST, codePoint) >= 0;
}
private static boolean isConsonantOrMedial(final int codePoint) {
return isConsonant(codePoint) || isMedial(codePoint);
}
private Event getLastEvent() {
final int size = mCurrentEvents.size();
if (size <= 0) {
return null;
}
return mCurrentEvents.get(size - 1);
}
private CharSequence getCharSequence() {
final StringBuilder s = new StringBuilder();
for (final Event e : mCurrentEvents) {
s.appendCodePoint(e.mCodePoint);
}
return s;
}
/**
* Clears the currently combining stream of events and returns the resulting software text
* event corresponding to the stream. Optionally adds a new event to the cleared stream.
* @param newEvent the new event to add to the stream. null if none.
* @return the resulting software text event. Null if none.
*/
private Event clearAndGetResultingEvent(final Event newEvent) {
final CharSequence combinedText;
if (mCurrentEvents.size() > 0) {
combinedText = getCharSequence();
mCurrentEvents.clear();
} else {
combinedText = null;
}
if (null != newEvent) {
mCurrentEvents.add(newEvent);
}
return null == combinedText ? null
: Event.createSoftwareTextEvent(combinedText, Event.NOT_A_KEY_CODE);
}
@Override
public Event processEvent(ArrayList<Event> previousEvents, Event event) {
return event;
public Event processEvent(ArrayList<Event> previousEvents, Event newEvent) {
final int codePoint = newEvent.mCodePoint;
if (VOWEL_E == codePoint) {
final Event lastEvent = getLastEvent();
if (null == lastEvent) {
mCurrentEvents.add(newEvent);
return null;
} else if (isConsonantOrMedial(lastEvent.mCodePoint)) {
final Event resultingEvent = clearAndGetResultingEvent(null);
mCurrentEvents.add(Event.createSoftwareKeypressEvent(ZERO_WIDTH_NON_JOINER,
Event.NOT_A_KEY_CODE,
Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE,
false /* isKeyRepeat */));
mCurrentEvents.add(newEvent);
return resultingEvent;
} else { // VOWEL_E == lastCodePoint. But if that was anything else this is correct too.
return clearAndGetResultingEvent(newEvent);
}
} if (isConsonant(codePoint)) {
final Event lastEvent = getLastEvent();
if (null == lastEvent) {
mCurrentEvents.add(newEvent);
return null;
} else if (VOWEL_E == lastEvent.mCodePoint) {
final int eventSize = mCurrentEvents.size();
if (eventSize >= 2
&& mCurrentEvents.get(eventSize - 2).mCodePoint == ZERO_WIDTH_NON_JOINER) {
// We have a ZWJN before a vowel E. We need to remove the ZWNJ and then
// reorder the vowel with respect to the consonant.
mCurrentEvents.remove(eventSize - 1);
mCurrentEvents.remove(eventSize - 2);
mCurrentEvents.add(newEvent);
mCurrentEvents.add(lastEvent);
return null;
}
// If there is already a consonant, then we are starting a new syllable.
for (int i = eventSize - 2; i >= 0; --i) {
if (isConsonant(mCurrentEvents.get(i).mCodePoint)) {
return clearAndGetResultingEvent(newEvent);
}
}
// If we come here, we didn't have a consonant so we reorder
mCurrentEvents.remove(eventSize - 1);
mCurrentEvents.add(newEvent);
mCurrentEvents.add(lastEvent);
return null;
} else { // lastCodePoint is a consonant/medial. But if it's something else it's fine
return clearAndGetResultingEvent(newEvent);
}
} else if (isMedial(codePoint)) {
final Event lastEvent = getLastEvent();
if (null == lastEvent) {
mCurrentEvents.add(newEvent);
return null;
} else if (VOWEL_E == lastEvent.mCodePoint) {
final int eventSize = mCurrentEvents.size();
// If there is already a consonant, then we are in the middle of a syllable, and we
// need to reorder.
boolean hasConsonant = false;
for (int i = eventSize - 2; i >= 0; --i) {
if (isConsonant(mCurrentEvents.get(i).mCodePoint)) {
hasConsonant = true;
break;
}
}
if (hasConsonant) {
mCurrentEvents.remove(eventSize - 1);
mCurrentEvents.add(newEvent);
mCurrentEvents.add(lastEvent);
return null;
}
// Otherwise, we just commit everything.
return clearAndGetResultingEvent(null);
} else { // lastCodePoint is a consonant/medial. But if it's something else it's fine
return clearAndGetResultingEvent(newEvent);
}
} else if (Constants.CODE_DELETE == newEvent.mKeyCode) {
if (mCurrentEvents.size() > 0) {
mCurrentEvents.remove(mCurrentEvents.size() - 1);
return null;
}
}
// This character is not part of the combining scheme, so we should reset everything.
if (mCurrentEvents.size() > 0) {
// If we have events in flight, then add the new event and return the resulting event.
mCurrentEvents.add(newEvent);
return clearAndGetResultingEvent(null);
} else {
// If we don't have any events in flight, then just pass this one through.
return newEvent;
}
}
@Override
public CharSequence getCombiningStateFeedback() {
return "";
return getCharSequence();
}
@Override
public void reset() {
mCurrentEvents.clear();
}
}

View file

@ -205,7 +205,8 @@ public final class SettingsValues {
}
public boolean isWordCodePoint(final int code) {
return Character.isLetter(code) || isWordConnector(code);
return Character.isLetter(code) || isWordConnector(code)
|| Character.COMBINING_SPACING_MARK == Character.getType(code);
}
public boolean isUsuallyPrecededBySpace(final int code) {

View file

@ -0,0 +1,232 @@
/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin;
import android.test.suitebuilder.annotation.LargeTest;
import android.util.Pair;
/*
* Relevant characters for this test :
* Spurs the need to reorder :
* U+1031 MYANMAR VOWEL SIGN E :
* U+1004 U+103A U+1039 Kinzi. It's a compound character.
*
* List of consonants :
* U+1000 MYANMAR LETTER KA က
* U+1001 MYANMAR LETTER KHA
* U+1002 MYANMAR LETTER GA
* U+1003 MYANMAR LETTER GHA
* U+1004 MYANMAR LETTER NGA
* U+1005 MYANMAR LETTER CA
* U+1006 MYANMAR LETTER CHA
* U+1007 MYANMAR LETTER JA
* U+1008 MYANMAR LETTER JHA
* U+1009 MYANMAR LETTER NYA
* U+100A MYANMAR LETTER NNYA
* U+100B MYANMAR LETTER TTA
* U+100C MYANMAR LETTER TTHA
* U+100D MYANMAR LETTER DDA
* U+100E MYANMAR LETTER DDHA
* U+100F MYANMAR LETTER NNA
* U+1010 MYANMAR LETTER TA
* U+1011 MYANMAR LETTER THA
* U+1012 MYANMAR LETTER DA
* U+1013 MYANMAR LETTER DHA
* U+1014 MYANMAR LETTER NA
* U+1015 MYANMAR LETTER PA
* U+1016 MYANMAR LETTER PHA
* U+1017 MYANMAR LETTER BA
* U+1018 MYANMAR LETTER BHA
* U+1019 MYANMAR LETTER MA
* U+101A MYANMAR LETTER YA
* U+101B MYANMAR LETTER RA
* U+101C MYANMAR LETTER LA
* U+101D MYANMAR LETTER WA
* U+101E MYANMAR LETTER SA
* U+101F MYANMAR LETTER HA
* U+1020 MYANMAR LETTER LLA
* U+103F MYANMAR LETTER GREAT SA
*
* List of medials :
* U+103B MYANMAR CONSONANT SIGN MEDIAL YA
* U+103C MYANMAR CONSONANT SIGN MEDIAL RA
* U+103D MYANMAR CONSONANT SIGN MEDIAL WA
* U+103E MYANMAR CONSONANT SIGN MEDIAL HA
* U+105E MYANMAR CONSONANT SIGN MON MEDIAL NA
* U+105F MYANMAR CONSONANT SIGN MON MEDIAL MA
* U+1060 MYANMAR CONSONANT SIGN MON MEDIAL LA
* U+1082 MYANMAR CONSONANT SIGN SHAN MEDIAL WA
*
* Other relevant characters :
* U+200C ZERO WIDTH NON-JOINER
* U+200B ZERO WIDTH SPACE
*/
@LargeTest
public class InputLogicTestsReorderingMyanmar extends InputTestsBase {
// The tests are formatted as follows.
// Each test is an entry in the array of Pair arrays.
// One test is an array of pairs. Each pair contains, in the `first' member,
// the code points that the next key press should contain. In the `second'
// member is stored the string that should be in the text view after this
// key press.
private static final Pair[][] TESTS = {
// Tests for U+1031 MYANMAR VOWEL SIGN E :
new Pair[] { // Type : U+1031 U+1000 U+101F က
Pair.create(new int[] { 0x1031 }, "\u1031"), //
Pair.create(new int[] { 0x1000 }, "\u1000\u1031"), // က
Pair.create(new int[] { 0x101F }, "\u1000\u1031\u101F") // က
},
new Pair[] { // Type : U+1000 U+1031 U+101F က
Pair.create(new int[] { 0x1000 }, "\u1000"), // က
Pair.create(new int[] { 0x1031 }, "\u1000\u200B\u1031"), // က‌
Pair.create(new int[] { 0x101F }, "\u1000\u101F\u1031") // ကဟ
},
new Pair[] { // Type : U+1031 U+101D U+103E U+1018
Pair.create(new int[] { 0x1031 }, "\u1031"), //
Pair.create(new int[] { 0x101D }, "\u101D\u1031"), //
Pair.create(new int[] { 0x103E }, "\u101D\u103E\u1031"), // ဝှ
Pair.create(new int[] { 0x1018 }, "\u101D\u103E\u1031\u1018") // ဝှ
},
new Pair[] { // Type : U+1031 U+1014 U+1031 U+1000 U+102C U+1004 U+103A U+1038 U+101C
// U+102C U+1038 U+104B က
Pair.create(new int[] { 0x1031 }, "\u1031"), //
Pair.create(new int[] { 0x1014 }, "\u1014\u1031"), //
Pair.create(new int[] { 0x1031 }, "\u1014\u1031\u1031"), //
Pair.create(new int[] { 0x1000 }, "\u1014\u1031\u1000\u1031"), // က
Pair.create(new int[] { 0x102C }, "\u1014\u1031\u1000\u1031\u102C"), // က
Pair.create(new int[] { 0x1004 }, "\u1014\u1031\u1000\u1031\u102C\u1004"), // က
Pair.create(new int[] { 0x103A }, // ကင်
"\u1014\u1031\u1000\u1031\u102C\u1004\u103A"),
Pair.create(new int[] { 0x1038 }, // ကင်
"\u1014\u1031\u1000\u1031\u102C\u1004\u103A\u1038"),
Pair.create(new int[] { 0x101C }, // ကင်
"\u1014\u1031\u1000\u1031\u102C\u1004\u103A\u1038\u101C"),
Pair.create(new int[] { 0x102C }, // ကင်
"\u1014\u1031\u1000\u1031\u102C\u1004\u103A\u1038\u101C\u102C"),
Pair.create(new int[] { 0x1038 }, // ကင်
"\u1014\u1031\u1000\u1031\u102C\u1004\u103A\u1038\u101C\u102C\u1038"),
Pair.create(new int[] { 0x104B }, // ကင်
"\u1014\u1031\u1000\u1031\u102C\u1004\u103A\u1038\u101C\u102C\u1038\u104B")
},
new Pair[] { // Type : U+1031 U+1031 U+1031 U+1000 က
Pair.create(new int[] { 0x1031 }, "\u1031"), //
Pair.create(new int[] { 0x1031 }, "\u1031\u1031"), //
Pair.create(new int[] { 0x1031 }, "\u1031\u1031\u1031"), // U+1031
Pair.create(new int[] { 0x1000 }, "\u1031\u1031\u1000\u1031") // က
},
new Pair[] { // Type : U+1031 U+1001 U+103B U+103D U+1038
Pair.create(new int[] { 0x1031 }, "\u1031"), //
Pair.create(new int[] { 0x1001 }, "\u1001\u1031"), //
Pair.create(new int[] { 0x103B }, "\u1001\u103B\u1031"), //
Pair.create(new int[] { 0x103D }, "\u1001\u103B\u103D\u1031"), //
Pair.create(new int[] { 0x1038 }, "\u1001\u103B\u103D\u1031\u1038") //
},
// Tests for Kinzi U+1004 U+103A U+1039 :
/* Kinzi reordering is not implemented yet. Uncomment these tests when it is.
new Pair[] { // Type : U+1021 U+1002 (U+1004 U+103A U+1039)
// U+101C U+1014 U+103A ( )
Pair.create(new int[] { 0x1021 }, "\u1021"), //
Pair.create(new int[] { 0x1002 }, "\u1021\u1002"), // အဂ
Pair.create(new int[] { 0x1004, 0x103A, 0x1039 }, // အင်္ဂ
"\u1021\u1004\u103A\u1039\u1002"),
Pair.create(new int[] { 0x101C }, // အင်္ဂလ
"\u1021\u1004\u103A\u1039\u1002\u101C"),
Pair.create(new int[] { 0x1014 }, // အင်္ဂလန
"\u1021\u1004\u103A\u1039\u1002\u101C\u1014"),
Pair.create(new int[] { 0x103A }, // အင်္ဂလန်
"\u1021\u1004\u103A\u1039\u1002\u101C\u1014\u103A")
},
new Pair[] { //Type : kinzi after a whole syllable U+101E U+1001 U+103B U+102D U+102F
// (U+1004 U+103A U+1039) U+1004 U+103A U+1038
Pair.create(new int[] { 0x101E }, "\u101E"), // သခ
Pair.create(new int[] { 0x1001 }, "\u101E\u1001"), // သခ
Pair.create(new int[] { 0x103B }, "\u101E\u1001\u103B"), // သခ
Pair.create(new int[] { 0x102D }, "\u101E\u1001\u103B\u102D"), // သခ
Pair.create(new int[] { 0x102F }, "\u101E\u1001\u103B\u102D\u102F"), // သခို
Pair.create(new int[] { 0x1004, 0x103A, 0x1039}, // သင်္ခို
"\u101E\u1004\u103A\u1039\u1001\u103B\u102D\u102F"),
Pair.create(new int[] { 0x1004 }, // သင်္ခိုင
"\u101E\u1004\u103A\u1039\u1001\u103B\u102D\u102F\u1004"),
Pair.create(new int[] { 0x103A }, // သင်္ခိုင်
"\u101E\u1004\u103A\u1039\u1001\u103B\u102D\u102F\u1004\u103A"),
Pair.create(new int[] { 0x1038 }, // သင်္ခိုင်
"\u101E\u1004\u103A\u1039\u1001\u103B\u102D\u102F\u1004\u103A\u1038")
},
new Pair[] { // Type : kinzi after the consonant U+101E U+1001 (U+1004 U+103A U+1039)
// U+103B U+102D U+102F U+1004 U+103A U+1038
Pair.create(new int[] { 0x101E }, "\u101E"), // သခ
Pair.create(new int[] { 0x1001 }, "\u101E\u1001"), // သခ
Pair.create(new int[] { 0x1004, 0x103A, 0x1039 }, // သင်္ခ
"\u101E\u1004\u103A\u1039\u1001"),
Pair.create(new int[] { 0x103B }, // သင်္ခ
"\u101E\u1004\u103A\u1039\u1001\u103B"),
Pair.create(new int[] { 0x102D }, // သင်္ခ
"\u101E\u1004\u103A\u1039\u1001\u103B\u102D"),
Pair.create(new int[] { 0x102F }, // သင်္ခို
"\u101E\u1004\u103A\u1039\u1001\u103B\u102D\u102F"),
Pair.create(new int[] { 0x1004 }, // သင်္ခိုင
"\u101E\u1004\u103A\u1039\u1001\u103B\u102D\u102F\u1004"),
Pair.create(new int[] { 0x103A }, // သင်္ခိုင်
"\u101E\u1004\u103A\u1039\u1001\u103B\u102D\u102F\u1004\u103A"),
Pair.create(new int[] { 0x1038 }, // သင်္ခိုင်
"\u101E\u1004\u103A\u1039\u1001\u103B\u102D\u102F\u1004\u103A\u1038")
},
*/
};
private void doMyanmarTest(final int testNumber, final Pair[] test) {
int stepNumber = 0;
for (final Pair<int[], String> step : test) {
++stepNumber;
final int[] input = step.first;
final String expectedResult = step.second;
if (input.length > 1) {
mLatinIME.onTextInput(new String(input, 0, input.length));
} else {
type(input[0]);
}
assertEquals("Myanmar reordering test " + testNumber + ", step " + stepNumber,
expectedResult, mEditText.getText().toString());
}
}
public void testMyanmarReordering() {
int testNumber = 0;
changeLanguage("mm_MY");
for (final Pair[] test : TESTS) {
// Small trick to reset LatinIME : setText("") and send updateSelection with values
// LatinIME has never seen, and cursor pos 0,0.
mEditText.setText("");
mLatinIME.onUpdateSelection(1, 1, 0, 0, -1, -1);
doMyanmarTest(++testNumber, test);
}
}
}