From 73ec85b8ad3102ce1c7e6013be73afe83475e589 Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Mon, 15 Apr 2013 17:33:48 +0900 Subject: [PATCH] Don't insert automatic spaces when text looks like a URL This is about as ad-hoc as it gets, but then again, what we want is probably as ad-hoc as it gets. All URL boxes I know of double as search bars, and not adding automatic spaces there sucks (e.g. in Chrome URL bar). And in other boxes actually you don't want to add a space if it looks like a URL. QSB isn't even a search box, and it behaves like this. So I think this is actually the right answer to the problem. Bug: 7062925 Change-Id: Ib09472b34644fd5bf2dc84bb97cedeeba28bcd02 --- .../android/inputmethod/latin/Constants.java | 2 + .../latin/ContactsBinaryDictionary.java | 2 +- .../inputmethod/latin/InputTypeUtils.java | 1 - .../android/inputmethod/latin/LatinIME.java | 3 +- .../latin/RichInputConnection.java | 12 +++- .../inputmethod/latin/StringUtils.java | 65 +++++++++++++++++++ .../AndroidWordLevelSpellCheckerSession.java | 8 ++- .../latin/RichInputConnectionTests.java | 5 ++ .../inputmethod/latin/StringUtilsTests.java | 22 +++++++ 9 files changed, 113 insertions(+), 7 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/Constants.java b/java/src/com/android/inputmethod/latin/Constants.java index 50e50233e..86bb25562 100644 --- a/java/src/com/android/inputmethod/latin/Constants.java +++ b/java/src/com/android/inputmethod/latin/Constants.java @@ -160,6 +160,8 @@ public final class Constants { public static final int CODE_DOUBLE_QUOTE = '"'; public static final int CODE_QUESTION_MARK = '?'; public static final int CODE_EXCLAMATION_MARK = '!'; + public static final int CODE_SLASH = '/'; + public static final int CODE_COMMERCIAL_AT = '@'; // TODO: Check how this should work for right-to-left languages. It seems to stand // that for rtl languages, a closing parenthesis is a left parenthesis. Is this // managed by the font? Or is it a different char? diff --git a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java index 22d189987..75c2cf2c8 100644 --- a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java @@ -252,7 +252,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { } private static boolean isValidName(final String name) { - if (name != null && -1 == name.indexOf('@')) { + if (name != null && -1 == name.indexOf(Constants.CODE_COMMERCIAL_AT)) { return true; } return false; diff --git a/java/src/com/android/inputmethod/latin/InputTypeUtils.java b/java/src/com/android/inputmethod/latin/InputTypeUtils.java index ecb20144b..46194f6e4 100644 --- a/java/src/com/android/inputmethod/latin/InputTypeUtils.java +++ b/java/src/com/android/inputmethod/latin/InputTypeUtils.java @@ -33,7 +33,6 @@ public final class InputTypeUtils implements InputType { private static final int[] SUPPRESSING_AUTO_SPACES_FIELD_VARIATION = { InputType.TYPE_TEXT_VARIATION_EMAIL_ADDRESS, InputType.TYPE_TEXT_VARIATION_PASSWORD, - InputType.TYPE_TEXT_VARIATION_URI, InputType.TYPE_TEXT_VARIATION_VISIBLE_PASSWORD, InputType.TYPE_TEXT_VARIATION_WEB_PASSWORD }; public static final int IME_ACTION_CUSTOM_LABEL = EditorInfo.IME_MASK_ACTION + 1; diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 0a6f2ab00..0e1c4dc31 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -2559,7 +2559,8 @@ public final class LatinIME extends InputMethodService implements KeyboardAction // This essentially inserts a space, and that's it. public void promotePhantomSpace() { - if (mSettings.getCurrent().shouldInsertSpacesAutomatically()) { + if (mSettings.getCurrent().shouldInsertSpacesAutomatically() + && !mConnection.textBeforeCursorLooksLikeURL()) { sendKeyCodePoint(Constants.CODE_SPACE); if (ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS) { ResearchLogger.latinIME_promotePhantomSpace(); diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java index e17846618..8ed7ab264 100644 --- a/java/src/com/android/inputmethod/latin/RichInputConnection.java +++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java @@ -19,7 +19,6 @@ package com.android.inputmethod.latin; import android.inputmethodservice.InputMethodService; import android.text.SpannableString; import android.text.TextUtils; -import android.text.style.SuggestionSpan; import android.util.Log; import android.view.KeyEvent; import android.view.inputmethod.CompletionInfo; @@ -721,4 +720,15 @@ public final class RichInputConnection { // position and the expected position, then it must be a belated update. return (newSelStart - oldSelStart) * (mCurrentCursorPosition - newSelStart) >= 0; } + + /** + * Looks at the text just before the cursor to find out if it looks like a URL. + * + * The weakest point here is, if we don't have enough text bufferized, we may fail to realize + * we are in URL situation, but other places in this class have the same limitation and it + * does not matter too much in the practice. + */ + public boolean textBeforeCursorLooksLikeURL() { + return StringUtils.lastPartLooksLikeURL(mCommittedTextBeforeComposingText); + } } diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java index 7f1e7c619..d5ee58a63 100644 --- a/java/src/com/android/inputmethod/latin/StringUtils.java +++ b/java/src/com/android/inputmethod/latin/StringUtils.java @@ -282,4 +282,69 @@ public final class StringUtils { } return builder.toString(); } + + /** + * Approximates whether the text before the cursor looks like a URL. + * + * This is not foolproof, but it should work well in the practice. + * Essentially it walks backward from the cursor until it finds something that's not a letter, + * digit, or common URL symbol like underscore. If it hasn't found a period yet, then it + * does not look like a URL. + * If the text: + * - starts with www and contains a period + * - starts with a slash preceded by either a slash, whitespace, or start-of-string + * Then it looks like a URL and we return true. Otherwise, we return false. + * + * Note: this method is called quite often, and should be fast. + * + * TODO: This will return that "abc./def" and ".abc/def" look like URLs to keep down the + * code complexity, but ideally it should not. It's acceptable for now. + */ + public static boolean lastPartLooksLikeURL(final CharSequence text) { + int i = text.length(); + if (0 == i) return false; + int wCount = 0; + int slashCount = 0; + boolean hasSlash = false; + boolean hasPeriod = false; + int codePoint = 0; + while (i > 0) { + codePoint = Character.codePointBefore(text, i); + if (codePoint < Constants.CODE_PERIOD || codePoint > 'z') { + // Handwavy heuristic to see if that's a URL character. Anything between period + // and z. This includes all lower- and upper-case ascii letters, period, + // underscore, arrobase, question mark, equal sign. It excludes spaces, exclamation + // marks, double quotes... + // Anything that's not a URL-like character causes us to break from here and + // evaluate normally. + break; + } + if (Constants.CODE_PERIOD == codePoint) { + hasPeriod = true; + } + if (Constants.CODE_SLASH == codePoint) { + hasSlash = true; + if (2 == ++slashCount) { + return true; + } + } else { + slashCount = 0; + } + if ('w' == codePoint) { + ++wCount; + } else { + wCount = 0; + } + i = Character.offsetByCodePoints(text, i, -1); + } + // End of the text run. + // If it starts with www and includes a period, then it looks like a URL. + if (wCount >= 3 && hasPeriod) return true; + // If it starts with a slash, and the code point before is whitespace, it looks like an URL. + if (1 == slashCount && (0 == i || Character.isWhitespace(codePoint))) return true; + // If it has both a period and a slash, it looks like an URL. + if (hasPeriod && hasSlash) return true; + // Otherwise, it doesn't look like an URL. + return false; + } } diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java index 96b2c818d..da8657201 100644 --- a/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java +++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java @@ -189,10 +189,12 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session { int letterCount = 0; for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { final int codePoint = text.codePointAt(i); - // Any word containing a '@' is probably an e-mail address - // Any word containing a '/' is probably either an ad-hoc combination of two + // Any word containing a COMMERCIAL_AT is probably an e-mail address + // Any word containing a SLASH is probably either an ad-hoc combination of two // words or a URI - in either case we don't want to spell check that - if ('@' == codePoint || '/' == codePoint) return true; + if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) { + return true; + } if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount; } // Guestimate heuristic: perform spell checking if at least 3/4 of the characters diff --git a/tests/src/com/android/inputmethod/latin/RichInputConnectionTests.java b/tests/src/com/android/inputmethod/latin/RichInputConnectionTests.java index dc8837dab..aacd60f4d 100644 --- a/tests/src/com/android/inputmethod/latin/RichInputConnectionTests.java +++ b/tests/src/com/android/inputmethod/latin/RichInputConnectionTests.java @@ -85,6 +85,11 @@ public class RichInputConnectionTests extends AndroidTestCase { public boolean endBatchEdit() { return true; } + + @Override + public boolean finishComposingText() { + return true; + } } private class MockInputMethodService extends InputMethodService { diff --git a/tests/src/com/android/inputmethod/latin/StringUtilsTests.java b/tests/src/com/android/inputmethod/latin/StringUtilsTests.java index 98a50b730..1e3cc8ad4 100644 --- a/tests/src/com/android/inputmethod/latin/StringUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/StringUtilsTests.java @@ -215,4 +215,26 @@ public class StringUtilsTests extends AndroidTestCase { checkCapitalize("Lorem!Ipsum (dolor) Sit * Amet", "Lorem!Ipsum (Dolor) Sit * Amet", " \n,.;!?*()&", Locale.ENGLISH); } + + public void testLooksLikeURL() { + assertTrue(StringUtils.lastPartLooksLikeURL("http://www.google.")); + assertFalse(StringUtils.lastPartLooksLikeURL("word wo")); + assertTrue(StringUtils.lastPartLooksLikeURL("/etc/foo")); + assertFalse(StringUtils.lastPartLooksLikeURL("left/right")); + assertTrue(StringUtils.lastPartLooksLikeURL("www.goo")); + assertTrue(StringUtils.lastPartLooksLikeURL("www.")); + assertFalse(StringUtils.lastPartLooksLikeURL("U.S.A")); + assertFalse(StringUtils.lastPartLooksLikeURL("U.S.A.")); + assertTrue(StringUtils.lastPartLooksLikeURL("rtsp://foo.")); + assertTrue(StringUtils.lastPartLooksLikeURL("://")); + assertFalse(StringUtils.lastPartLooksLikeURL("abc/")); + assertTrue(StringUtils.lastPartLooksLikeURL("abc.def/ghi")); + assertFalse(StringUtils.lastPartLooksLikeURL("abc.def")); + // TODO: ideally this would not look like a URL, but to keep down the complexity of the + // code for now True is acceptable. + assertTrue(StringUtils.lastPartLooksLikeURL("abc./def")); + // TODO: ideally this would not look like a URL, but to keep down the complexity of the + // code for now True is acceptable. + assertTrue(StringUtils.lastPartLooksLikeURL(".abc/def")); + } }