Merge "Fix a bug with the Greek question mark."

This commit is contained in:
Jean Chalard 2014-09-08 08:58:26 +00:00 committed by Android (Google) Code Review
commit a1d2315499
5 changed files with 67 additions and 5 deletions

View file

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
/*
**
** Copyright 2014, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
-->
<resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2">
<!-- Symbols that terminate sentences and require capitalization on the next char -->
<string name="symbols_sentence_terminators">.;!?</string>
</resources>

View file

@ -33,9 +33,14 @@
<string name="symbols_word_separators">"&#x0009;&#x0020;&#x000A;&#x00A0;"()[]{}*&amp;&lt;&gt;+=|.,;:!?/_\"</string> <string name="symbols_word_separators">"&#x0009;&#x0020;&#x000A;&#x00A0;"()[]{}*&amp;&lt;&gt;+=|.,;:!?/_\"</string>
<!-- Word connectors --> <!-- Word connectors -->
<string name="symbols_word_connectors">\'-</string> <string name="symbols_word_connectors">\'-</string>
<!-- The sentence separator code point, for capitalization --> <!-- The sentence separator code point, for capitalization and auto-insertion -->
<!-- U+002E: "." FULL STOP ; 2Eh = 46d --> <!-- U+002E: "." FULL STOP ; 2Eh = 46d -->
<integer name="sentence_separator">46</integer> <integer name="sentence_separator">46</integer>
<!-- The abbreviation marker code point -->
<!-- U+002E: "." FULL STOP ; 2Eh = 46d -->
<integer name="abbreviation_marker">46</integer>
<!-- Symbols that terminate sentences and require capitalization on the next char -->
<string name="symbols_sentence_terminators">.?!</string>
<!-- Whether this language uses spaces between words --> <!-- Whether this language uses spaces between words -->
<bool name="current_language_has_spaces">true</bool> <bool name="current_language_has_spaces">true</bool>
</resources> </resources>

View file

@ -35,6 +35,8 @@ public final class SpacingAndPunctuations {
public final int[] mSortedWordSeparators; public final int[] mSortedWordSeparators;
public final PunctuationSuggestions mSuggestPuncList; public final PunctuationSuggestions mSuggestPuncList;
private final int mSentenceSeparator; private final int mSentenceSeparator;
private final int mAbbreviationMarker;
private final int[] mSortedSentenceTerminators;
public final String mSentenceSeparatorAndSpace; public final String mSentenceSeparatorAndSpace;
public final boolean mCurrentLanguageHasSpaces; public final boolean mCurrentLanguageHasSpaces;
public final boolean mUsesAmericanTypography; public final boolean mUsesAmericanTypography;
@ -54,7 +56,10 @@ public final class SpacingAndPunctuations {
res.getString(R.string.symbols_word_connectors)); res.getString(R.string.symbols_word_connectors));
mSortedWordSeparators = StringUtils.toSortedCodePointArray( mSortedWordSeparators = StringUtils.toSortedCodePointArray(
res.getString(R.string.symbols_word_separators)); res.getString(R.string.symbols_word_separators));
mSortedSentenceTerminators = StringUtils.toSortedCodePointArray(
res.getString(R.string.symbols_sentence_terminators));
mSentenceSeparator = res.getInteger(R.integer.sentence_separator); mSentenceSeparator = res.getInteger(R.integer.sentence_separator);
mAbbreviationMarker = res.getInteger(R.integer.abbreviation_marker);
mSentenceSeparatorAndSpace = new String(new int[] { mSentenceSeparatorAndSpace = new String(new int[] {
mSentenceSeparator, Constants.CODE_SPACE }, 0, 2); mSentenceSeparator, Constants.CODE_SPACE }, 0, 2);
mCurrentLanguageHasSpaces = res.getBoolean(R.bool.current_language_has_spaces); mCurrentLanguageHasSpaces = res.getBoolean(R.bool.current_language_has_spaces);
@ -92,6 +97,14 @@ public final class SpacingAndPunctuations {
return Arrays.binarySearch(mSortedSymbolsClusteringTogether, code) >= 0; return Arrays.binarySearch(mSortedSymbolsClusteringTogether, code) >= 0;
} }
public boolean isSentenceTerminator(final int code) {
return Arrays.binarySearch(mSortedSentenceTerminators, code) >= 0;
}
public boolean isAbbreviationMarker(final int code) {
return code == mAbbreviationMarker;
}
public boolean isSentenceSeparator(final int code) { public boolean isSentenceSeparator(final int code) {
return code == mSentenceSeparator; return code == mSentenceSeparator;
} }

View file

@ -213,13 +213,22 @@ public final class CapsModeUtils {
char c = cs.charAt(--j); char c = cs.charAt(--j);
// We found the next interesting chunk of text ; next we need to determine if it's the // We found the next interesting chunk of text ; next we need to determine if it's the
// end of a sentence. If we have a question mark or an exclamation mark, it's the end of // end of a sentence. If we have a sentence terminator (typically a question mark or an
// a sentence. If it's neither, the only remaining case is the period so we get the opposite // exclamation mark), then it's the end of a sentence; however, we treat the abbreviation
// case out of the way. // marker specially because usually is the same char as the sentence separator (the
if (c == Constants.CODE_QUESTION_MARK || c == Constants.CODE_EXCLAMATION_MARK) { // period in most languages) and in this case we need to apply a heuristic to determine
// in which of these senses it's used.
if (spacingAndPunctuations.isSentenceTerminator(c)
&& !spacingAndPunctuations.isAbbreviationMarker(c)) {
return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS
| TextUtils.CAP_MODE_SENTENCES) & reqModes; | TextUtils.CAP_MODE_SENTENCES) & reqModes;
} }
// If we reach here, we know we have whitespace before the cursor and before that there
// is something that either does not terminate the sentence, or a symbol preceded by the
// start of the text, or it's the sentence separator AND it happens to be the same code
// point as the abbreviation marker.
// If it's a symbol or something that does not terminate the sentence, then we need to
// return caps for MODE_CHARACTERS and MODE_WORDS, but not for MODE_SENTENCES.
if (!spacingAndPunctuations.isSentenceSeparator(c) || j <= 0) { if (!spacingAndPunctuations.isSentenceSeparator(c) || j <= 0) {
return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes; return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
} }

View file

@ -136,5 +136,17 @@ public class CapsModeUtilsTests extends AndroidTestCase {
allPathsForCaps("Word; ", c | w, sp, false); allPathsForCaps("Word; ", c | w, sp, false);
allPathsForCaps("Word;", c | w, sp, true); allPathsForCaps("Word;", c | w, sp, true);
allPathsForCaps("Word;", c, sp, false); allPathsForCaps("Word;", c, sp, false);
// Test for sentence terminators in Greek
sp = job.runInLocale(res, LocaleUtils.constructLocaleFromString("el"));
allPathsForCaps("Word? ", c | w | s, sp, false);
allPathsForCaps("Word?", c | w | s, sp, true);
allPathsForCaps("Word?", c, sp, false);
allPathsForCaps("Word! ", c | w | s, sp, false);
allPathsForCaps("Word!", c | w | s, sp, true);
allPathsForCaps("Word!", c, sp, false);
// In Greek ";" is the question mark and it terminates the sentence
allPathsForCaps("Word; ", c | w | s, sp, false);
allPathsForCaps("Word;", c | w | s, sp, true);
allPathsForCaps("Word;", c, sp, false);
} }
} }