Merge "Don't insert automatic spaces when text looks like a URL"

This commit is contained in:
Jean Chalard 2013-04-16 10:07:36 +00:00 committed by Android (Google) Code Review
commit ff766f8c4b
9 changed files with 113 additions and 7 deletions

View file

@ -160,6 +160,8 @@ public final class Constants {
public static final int CODE_DOUBLE_QUOTE = '"';
public static final int CODE_QUESTION_MARK = '?';
public static final int CODE_EXCLAMATION_MARK = '!';
public static final int CODE_SLASH = '/';
public static final int CODE_COMMERCIAL_AT = '@';
// TODO: Check how this should work for right-to-left languages. It seems to stand
// that for rtl languages, a closing parenthesis is a left parenthesis. Is this
// managed by the font? Or is it a different char?

View file

@ -252,7 +252,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
}
private static boolean isValidName(final String name) {
if (name != null && -1 == name.indexOf('@')) {
if (name != null && -1 == name.indexOf(Constants.CODE_COMMERCIAL_AT)) {
return true;
}
return false;

View file

@ -33,7 +33,6 @@ public final class InputTypeUtils implements InputType {
private static final int[] SUPPRESSING_AUTO_SPACES_FIELD_VARIATION = {
InputType.TYPE_TEXT_VARIATION_EMAIL_ADDRESS,
InputType.TYPE_TEXT_VARIATION_PASSWORD,
InputType.TYPE_TEXT_VARIATION_URI,
InputType.TYPE_TEXT_VARIATION_VISIBLE_PASSWORD,
InputType.TYPE_TEXT_VARIATION_WEB_PASSWORD };
public static final int IME_ACTION_CUSTOM_LABEL = EditorInfo.IME_MASK_ACTION + 1;

View file

@ -2559,7 +2559,8 @@ public final class LatinIME extends InputMethodService implements KeyboardAction
// This essentially inserts a space, and that's it.
public void promotePhantomSpace() {
if (mSettings.getCurrent().shouldInsertSpacesAutomatically()) {
if (mSettings.getCurrent().shouldInsertSpacesAutomatically()
&& !mConnection.textBeforeCursorLooksLikeURL()) {
sendKeyCodePoint(Constants.CODE_SPACE);
if (ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS) {
ResearchLogger.latinIME_promotePhantomSpace();

View file

@ -19,7 +19,6 @@ package com.android.inputmethod.latin;
import android.inputmethodservice.InputMethodService;
import android.text.SpannableString;
import android.text.TextUtils;
import android.text.style.SuggestionSpan;
import android.util.Log;
import android.view.KeyEvent;
import android.view.inputmethod.CompletionInfo;
@ -721,4 +720,15 @@ public final class RichInputConnection {
// position and the expected position, then it must be a belated update.
return (newSelStart - oldSelStart) * (mCurrentCursorPosition - newSelStart) >= 0;
}
/**
* Looks at the text just before the cursor to find out if it looks like a URL.
*
* The weakest point here is, if we don't have enough text bufferized, we may fail to realize
* we are in URL situation, but other places in this class have the same limitation and it
* does not matter too much in the practice.
*/
public boolean textBeforeCursorLooksLikeURL() {
return StringUtils.lastPartLooksLikeURL(mCommittedTextBeforeComposingText);
}
}

View file

@ -282,4 +282,69 @@ public final class StringUtils {
}
return builder.toString();
}
/**
* Approximates whether the text before the cursor looks like a URL.
*
* This is not foolproof, but it should work well in the practice.
* Essentially it walks backward from the cursor until it finds something that's not a letter,
* digit, or common URL symbol like underscore. If it hasn't found a period yet, then it
* does not look like a URL.
* If the text:
* - starts with www and contains a period
* - starts with a slash preceded by either a slash, whitespace, or start-of-string
* Then it looks like a URL and we return true. Otherwise, we return false.
*
* Note: this method is called quite often, and should be fast.
*
* TODO: This will return that "abc./def" and ".abc/def" look like URLs to keep down the
* code complexity, but ideally it should not. It's acceptable for now.
*/
public static boolean lastPartLooksLikeURL(final CharSequence text) {
int i = text.length();
if (0 == i) return false;
int wCount = 0;
int slashCount = 0;
boolean hasSlash = false;
boolean hasPeriod = false;
int codePoint = 0;
while (i > 0) {
codePoint = Character.codePointBefore(text, i);
if (codePoint < Constants.CODE_PERIOD || codePoint > 'z') {
// Handwavy heuristic to see if that's a URL character. Anything between period
// and z. This includes all lower- and upper-case ascii letters, period,
// underscore, arrobase, question mark, equal sign. It excludes spaces, exclamation
// marks, double quotes...
// Anything that's not a URL-like character causes us to break from here and
// evaluate normally.
break;
}
if (Constants.CODE_PERIOD == codePoint) {
hasPeriod = true;
}
if (Constants.CODE_SLASH == codePoint) {
hasSlash = true;
if (2 == ++slashCount) {
return true;
}
} else {
slashCount = 0;
}
if ('w' == codePoint) {
++wCount;
} else {
wCount = 0;
}
i = Character.offsetByCodePoints(text, i, -1);
}
// End of the text run.
// If it starts with www and includes a period, then it looks like a URL.
if (wCount >= 3 && hasPeriod) return true;
// If it starts with a slash, and the code point before is whitespace, it looks like an URL.
if (1 == slashCount && (0 == i || Character.isWhitespace(codePoint))) return true;
// If it has both a period and a slash, it looks like an URL.
if (hasPeriod && hasSlash) return true;
// Otherwise, it doesn't look like an URL.
return false;
}
}

View file

@ -189,10 +189,12 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session {
int letterCount = 0;
for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
final int codePoint = text.codePointAt(i);
// Any word containing a '@' is probably an e-mail address
// Any word containing a '/' is probably either an ad-hoc combination of two
// Any word containing a COMMERCIAL_AT is probably an e-mail address
// Any word containing a SLASH is probably either an ad-hoc combination of two
// words or a URI - in either case we don't want to spell check that
if ('@' == codePoint || '/' == codePoint) return true;
if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) {
return true;
}
if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount;
}
// Guestimate heuristic: perform spell checking if at least 3/4 of the characters

View file

@ -85,6 +85,11 @@ public class RichInputConnectionTests extends AndroidTestCase {
public boolean endBatchEdit() {
return true;
}
@Override
public boolean finishComposingText() {
return true;
}
}
private class MockInputMethodService extends InputMethodService {

View file

@ -215,4 +215,26 @@ public class StringUtilsTests extends AndroidTestCase {
checkCapitalize("Lorem!Ipsum (dolor) Sit * Amet", "Lorem!Ipsum (Dolor) Sit * Amet",
" \n,.;!?*()&", Locale.ENGLISH);
}
public void testLooksLikeURL() {
assertTrue(StringUtils.lastPartLooksLikeURL("http://www.google."));
assertFalse(StringUtils.lastPartLooksLikeURL("word wo"));
assertTrue(StringUtils.lastPartLooksLikeURL("/etc/foo"));
assertFalse(StringUtils.lastPartLooksLikeURL("left/right"));
assertTrue(StringUtils.lastPartLooksLikeURL("www.goo"));
assertTrue(StringUtils.lastPartLooksLikeURL("www."));
assertFalse(StringUtils.lastPartLooksLikeURL("U.S.A"));
assertFalse(StringUtils.lastPartLooksLikeURL("U.S.A."));
assertTrue(StringUtils.lastPartLooksLikeURL("rtsp://foo."));
assertTrue(StringUtils.lastPartLooksLikeURL("://"));
assertFalse(StringUtils.lastPartLooksLikeURL("abc/"));
assertTrue(StringUtils.lastPartLooksLikeURL("abc.def/ghi"));
assertFalse(StringUtils.lastPartLooksLikeURL("abc.def"));
// TODO: ideally this would not look like a URL, but to keep down the complexity of the
// code for now True is acceptable.
assertTrue(StringUtils.lastPartLooksLikeURL("abc./def"));
// TODO: ideally this would not look like a URL, but to keep down the complexity of the
// code for now True is acceptable.
assertTrue(StringUtils.lastPartLooksLikeURL(".abc/def"));
}
}