From f3c319fb8ac29448c491af95261a4ce01b64a59c Mon Sep 17 00:00:00 2001 From: Dan Zivkovic Date: Wed, 4 Feb 2015 16:12:15 -0800 Subject: [PATCH] Selections spans should not split surrogate pair. When committing a span after a revert, the offset logic was such that it split a surrogate unicode pair used to express an emoji. Checking the last character of the span lets us avoid this problem. Fix for bug 19255233. Change-Id: I07d18d9002b5075f7925319dd05962011656c311 --- .../inputmethod/latin/common/Constants.java | 3 -- .../latin/common/UnicodeSurrogate.java | 38 +++++++++++++++++++ .../latin/RichInputConnection.java | 25 +++++++++++- .../latin/common/UnicodeSurrogateTests.java | 36 ++++++++++++++++++ 4 files changed, 98 insertions(+), 4 deletions(-) create mode 100644 common/src/com/android/inputmethod/latin/common/UnicodeSurrogate.java create mode 100644 tests/src/com/android/inputmethod/latin/common/UnicodeSurrogateTests.java diff --git a/common/src/com/android/inputmethod/latin/common/Constants.java b/common/src/com/android/inputmethod/latin/common/Constants.java index a860d3560..a10f866fc 100644 --- a/common/src/com/android/inputmethod/latin/common/Constants.java +++ b/common/src/com/android/inputmethod/latin/common/Constants.java @@ -163,7 +163,6 @@ public final class Constants { // TODO: replace the following constants with state in InputTransaction? public static final int NOT_A_COORDINATE = -1; public static final int SUGGESTION_STRIP_COORDINATE = -2; - public static final int SPELL_CHECKER_COORDINATE = -3; public static final int EXTERNAL_KEYBOARD_COORDINATE = -4; // A hint on how many characters to cache from the TextView. A good value of this is given by @@ -214,8 +213,6 @@ public final class Constants { public static final int CODE_DASH = '-'; public static final int CODE_SINGLE_QUOTE = '\''; public static final int CODE_DOUBLE_QUOTE = '"'; - public static final int CODE_QUESTION_MARK = '?'; - public static final int CODE_EXCLAMATION_MARK = '!'; public static final int CODE_SLASH = '/'; public static final int CODE_BACKSLASH = '\\'; public static final int CODE_VERTICAL_BAR = '|'; diff --git a/common/src/com/android/inputmethod/latin/common/UnicodeSurrogate.java b/common/src/com/android/inputmethod/latin/common/UnicodeSurrogate.java new file mode 100644 index 000000000..10974634d --- /dev/null +++ b/common/src/com/android/inputmethod/latin/common/UnicodeSurrogate.java @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License + */ + +package com.android.inputmethod.latin.common; + +/** + * Emojis are supplementary characters expressed as a low+high pair. For instance, + * the emoji U+1F625 is encoded as "\uD83D\uDE25" in UTF-16, where '\uD83D' is in + * the range of [0xd800, 0xdbff] and '\uDE25' is in the range of [0xdc00, 0xdfff]. + * {@see http://docs.oracle.com/javase/6/docs/api/java/lang/Character.html#unicode} + */ +public final class UnicodeSurrogate { + private static final char LOW_SURROGATE_MIN = '\uD800'; + private static final char LOW_SURROGATE_MAX = '\uDBFF'; + private static final char HIGH_SURROGATE_MIN = '\uDC00'; + private static final char HIGH_SURROGATE_MAX = '\uDFFF'; + + public static boolean isLowSurrogate(final char c) { + return c >= LOW_SURROGATE_MIN && c <= LOW_SURROGATE_MAX; + } + + public static boolean isHighSurrogate(final char c) { + return c >= HIGH_SURROGATE_MIN && c <= HIGH_SURROGATE_MAX; + } +} diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java index 2cf476f8b..f770e88ef 100644 --- a/java/src/com/android/inputmethod/latin/RichInputConnection.java +++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java @@ -21,6 +21,7 @@ import android.os.Build; import android.os.Bundle; import android.text.SpannableStringBuilder; import android.text.TextUtils; +import android.text.style.CharacterStyle; import android.util.Log; import android.view.KeyEvent; import android.view.inputmethod.CompletionInfo; @@ -32,6 +33,7 @@ import android.view.inputmethod.InputMethodManager; import com.android.inputmethod.compat.InputConnectionCompatUtils; import com.android.inputmethod.latin.common.Constants; +import com.android.inputmethod.latin.common.UnicodeSurrogate; import com.android.inputmethod.latin.common.StringUtils; import com.android.inputmethod.latin.inputlogic.PrivateCommandPerformer; import com.android.inputmethod.latin.settings.SpacingAndPunctuations; @@ -261,7 +263,28 @@ public final class RichInputConnection implements PrivateCommandPerformer { mComposingText.setLength(0); mLastCommittedTextHasBackgroundColor = false; if (null != mIC) { - mIC.commitText(text, newCursorPosition); + mTempObjectForCommitText.clear(); + mTempObjectForCommitText.append(text); + final CharacterStyle[] spans = mTempObjectForCommitText.getSpans( + 0, text.length(), CharacterStyle.class); + for (final CharacterStyle span : spans) { + final int spanStart = mTempObjectForCommitText.getSpanStart(span); + final int spanEnd = mTempObjectForCommitText.getSpanEnd(span); + final int spanFlags = mTempObjectForCommitText.getSpanFlags(span); + // We have to adjust the end of the span to include an additional character. + // This is to avoid splitting a unicode surrogate pair. + // See com.android.inputmethod.latin.common.Constants.UnicodeSurrogate + // See https://b.corp.google.com/issues/19255233 + if (0 < spanEnd && spanEnd < mTempObjectForCommitText.length()) { + final char spanEndChar = mTempObjectForCommitText.charAt(spanEnd - 1); + final char nextChar = mTempObjectForCommitText.charAt(spanEnd); + if (UnicodeSurrogate.isLowSurrogate(spanEndChar) + && UnicodeSurrogate.isHighSurrogate(nextChar)) { + mTempObjectForCommitText.setSpan(span, spanStart, spanEnd + 1, spanFlags); + } + } + } + mIC.commitText(mTempObjectForCommitText, newCursorPosition); } } diff --git a/tests/src/com/android/inputmethod/latin/common/UnicodeSurrogateTests.java b/tests/src/com/android/inputmethod/latin/common/UnicodeSurrogateTests.java new file mode 100644 index 000000000..59bb08292 --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/common/UnicodeSurrogateTests.java @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.common; + +import android.test.AndroidTestCase; +import android.test.suitebuilder.annotation.SmallTest; + +@SmallTest +public class UnicodeSurrogateTests extends AndroidTestCase { + + public void testIsLowSurrogate() { + assertFalse(UnicodeSurrogate.isLowSurrogate('\uD7FF')); + assertTrue(UnicodeSurrogate.isLowSurrogate('\uD83D')); + assertFalse(UnicodeSurrogate.isLowSurrogate('\uDC00')); + } + + public void testIsHighSurrogate() { + assertFalse(UnicodeSurrogate.isHighSurrogate('\uDBFF')); + assertTrue(UnicodeSurrogate.isHighSurrogate('\uDE25')); + assertFalse(UnicodeSurrogate.isHighSurrogate('\uE000')); + } +}