diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 7b37777f5..5e36d9703 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -247,7 +247,9 @@ public final class BinaryDictionary extends Dictionary { final String prevWord, final ProximityInfo proximityInfo, final boolean blockOffensiveWords, final int[] additionalFeaturesOptions, final int sessionId, final float[] inOutLanguageWeight) { - if (!isValidDictionary()) return null; + if (!isValidDictionary()) { + return null; + } Arrays.fill(mInputCodePoints, Constants.NOT_A_CODE); // TODO: toLowerCase in the native code @@ -257,12 +259,11 @@ public final class BinaryDictionary extends Dictionary { final boolean isGesture = composer.isBatchMode(); final int inputSize; if (!isGesture) { - final int composerSize = composer.sizeWithoutTrailingSingleQuotes(); - if (composerSize > MAX_WORD_LENGTH - 1) return null; - for (int i = 0; i < composerSize; i++) { - mInputCodePoints[i] = composer.getCodeAt(i); + inputSize = composer.copyCodePointsExceptTrailingSingleQuotesAndReturnCodePointCount( + mInputCodePoints, MAX_WORD_LENGTH); + if (inputSize < 0) { + return null; } - inputSize = composerSize; } else { inputSize = inputPointers.getPointerSize(); } diff --git a/java/src/com/android/inputmethod/latin/WordComposer.java b/java/src/com/android/inputmethod/latin/WordComposer.java index 81d642ff2..02f18cdd3 100644 --- a/java/src/com/android/inputmethod/latin/WordComposer.java +++ b/java/src/com/android/inputmethod/latin/WordComposer.java @@ -131,29 +131,42 @@ public final class WordComposer { return mCodePointSize; } + /** + * Copy the code points in the typed word to a destination array of ints. + * + * If the array is too small to hold the code points in the typed word, nothing is copied and + * -1 is returned. + * + * @param destination the array of ints. + * @param maxSize the size of the array. + * @return the number of copied code points. + */ + public int copyCodePointsExceptTrailingSingleQuotesAndReturnCodePointCount( + final int[] destination, final int maxSize) { + int i = mTypedWordCache.length() - 1; + while (i >= 0 && mTypedWordCache.charAt(i) == Constants.CODE_SINGLE_QUOTE) { + --i; + } + if (i < 0) { + // The string is empty or contains only single quotes. + return 0; + } + final int codePointSize = Character.codePointCount(mTypedWordCache, 0, i); + if (codePointSize > maxSize) { + return -1; + } + return StringUtils.copyCodePointsAndReturnCodePointCount(destination, mTypedWordCache, 0, + i + 1, true /* downCase */); + } + public boolean isSingleLetter() { return size() == 1; } - // When the composition contains trailing quotes, we don't pass them to the suggestion engine. - // This is because "'tgis'" should be corrected to "'this'", but we can't afford to consider - // single quotes as separators because of their very common use as apostrophes. - public int sizeWithoutTrailingSingleQuotes() { - return size() - mTrailingSingleQuotesCount; - } - public final boolean isComposingWord() { return size() > 0; } - // TODO: make sure that the index should not exceed MAX_WORD_LENGTH - public int getCodeAt(int index) { - if (index >= MAX_WORD_LENGTH) { - return -1; - } - return mPrimaryKeyCodes[index]; - } - public InputPointers getInputPointers() { return mInputPointers; } diff --git a/java/src/com/android/inputmethod/latin/utils/StringUtils.java b/java/src/com/android/inputmethod/latin/utils/StringUtils.java index accbc8b7b..374badc19 100644 --- a/java/src/com/android/inputmethod/latin/utils/StringUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/StringUtils.java @@ -191,13 +191,42 @@ public final class StringUtils { } final int[] codePoints = new int[Character.codePointCount(charSequence, startIndex, endIndex)]; + copyCodePointsAndReturnCodePointCount(codePoints, charSequence, startIndex, endIndex, + false /* downCase */); + return codePoints; + } + + /** + * Copies the codepoints in a CharSequence to an int array. + * + * This method assumes there is enough space in the array to store the code points. The size + * can be measured with Character#codePointCount(CharSequence, int, int) before passing to this + * method. If the int array is too small, an ArrayIndexOutOfBoundsException will be thrown. + * Also, this method makes no effort to be thread-safe. Do not modify the CharSequence while + * this method is running, or the behavior is undefined. + * This method can optionally downcase code points before copying them, but it pays no attention + * to locale while doing so. + * + * @param destination the int array. + * @param charSequence the CharSequence. + * @param startIndex the start index inside the string in java chars, inclusive. + * @param endIndex the end index inside the string in java chars, exclusive. + * @param downCase if this is true, code points will be downcased before being copied. + * @return the number of copied code points. + */ + public static int copyCodePointsAndReturnCodePointCount(final int[] destination, + final CharSequence charSequence, final int startIndex, final int endIndex, + final boolean downCase) { int destIndex = 0; for (int index = startIndex; index < endIndex; index = Character.offsetByCodePoints(charSequence, index, 1)) { - codePoints[destIndex] = Character.codePointAt(charSequence, index); + final int codePoint = Character.codePointAt(charSequence, index); + // TODO: stop using this, as it's not aware of the locale and does not always do + // the right thing. + destination[destIndex] = downCase ? Character.toLowerCase(codePoint) : codePoint; destIndex++; } - return codePoints; + return destIndex; } public static int[] toSortedCodePointArray(final String string) { diff --git a/tests/src/com/android/inputmethod/latin/utils/StringAndJsonUtilsTests.java b/tests/src/com/android/inputmethod/latin/utils/StringAndJsonUtilsTests.java index e55c32bd0..2a4ead383 100644 --- a/tests/src/com/android/inputmethod/latin/utils/StringAndJsonUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/utils/StringAndJsonUtilsTests.java @@ -308,4 +308,68 @@ public class StringAndJsonUtilsTests extends AndroidTestCase { assertEquals(objs[i], newObjArray.get(i)); } } + + public void testToCodePointArray() { + final String STR_WITH_SUPPLEMENTARY_CHAR = "abcde\uD861\uDED7fgh\u0000\u2002\u2003\u3000xx"; + final int[] EXPECTED_RESULT = new int[] { 'a', 'b', 'c', 'd', 'e', 0x286D7, 'f', 'g', 'h', + 0, 0x2002, 0x2003, 0x3000, 'x', 'x'}; + final int[] codePointArray = StringUtils.toCodePointArray(STR_WITH_SUPPLEMENTARY_CHAR, 0, + STR_WITH_SUPPLEMENTARY_CHAR.length()); + assertEquals("toCodePointArray, size matches", codePointArray.length, + EXPECTED_RESULT.length); + for (int i = 0; i < EXPECTED_RESULT.length; ++i) { + assertEquals("toCodePointArray position " + i, codePointArray[i], EXPECTED_RESULT[i]); + } + } + + public void testCopyCodePointsAndReturnCodePointCount() { + final String STR_WITH_SUPPLEMENTARY_CHAR = "AbcDE\uD861\uDED7fGh\u0000\u2002\u3000あx"; + final int[] EXPECTED_RESULT = new int[] { 'A', 'b', 'c', 'D', 'E', 0x286D7, + 'f', 'G', 'h', 0, 0x2002, 0x3000, 'あ', 'x'}; + final int[] EXPECTED_RESULT_DOWNCASE = new int[] { 'a', 'b', 'c', 'd', 'e', 0x286D7, + 'f', 'g', 'h', 0, 0x2002, 0x3000, 'あ', 'x'}; + + int[] codePointArray = new int[50]; + int codePointCount = StringUtils.copyCodePointsAndReturnCodePointCount(codePointArray, + STR_WITH_SUPPLEMENTARY_CHAR, 0, + STR_WITH_SUPPLEMENTARY_CHAR.length(), false /* downCase */); + assertEquals("copyCodePointsAndReturnCodePointCount, size matches", codePointCount, + EXPECTED_RESULT.length); + for (int i = 0; i < codePointCount; ++i) { + assertEquals("copyCodePointsAndReturnCodePointCount position " + i, codePointArray[i], + EXPECTED_RESULT[i]); + } + + codePointCount = StringUtils.copyCodePointsAndReturnCodePointCount(codePointArray, + STR_WITH_SUPPLEMENTARY_CHAR, 0, + STR_WITH_SUPPLEMENTARY_CHAR.length(), true /* downCase */); + assertEquals("copyCodePointsAndReturnCodePointCount downcase, size matches", codePointCount, + EXPECTED_RESULT_DOWNCASE.length); + for (int i = 0; i < codePointCount; ++i) { + assertEquals("copyCodePointsAndReturnCodePointCount position " + i, codePointArray[i], + EXPECTED_RESULT_DOWNCASE[i]); + } + + final int JAVA_CHAR_COUNT = 8; + final int CODEPOINT_COUNT = 7; + codePointCount = StringUtils.copyCodePointsAndReturnCodePointCount(codePointArray, + STR_WITH_SUPPLEMENTARY_CHAR, 0, JAVA_CHAR_COUNT, false /* downCase */); + assertEquals("copyCodePointsAndReturnCodePointCount, size matches", codePointCount, + CODEPOINT_COUNT); + for (int i = 0; i < codePointCount; ++i) { + assertEquals("copyCodePointsAndReturnCodePointCount position " + i, codePointArray[i], + EXPECTED_RESULT[i]); + } + + boolean exceptionHappened = false; + codePointArray = new int[5]; + try { + codePointCount = StringUtils.copyCodePointsAndReturnCodePointCount(codePointArray, + STR_WITH_SUPPLEMENTARY_CHAR, 0, JAVA_CHAR_COUNT, false /* downCase */); + } catch (ArrayIndexOutOfBoundsException e) { + exceptionHappened = true; + } + assertTrue("copyCodePointsAndReturnCodePointCount throws when array is too small", + exceptionHappened); + } }