Merge "Parse escaped sequence strictly in CSV parser"

This commit is contained in:
Tadashi G. Takaoka 2012-02-02 04:11:19 -08:00 committed by Android (Google) Code Review
commit c373585f08
4 changed files with 90 additions and 91 deletions

View file

@ -54,7 +54,7 @@
<string name="more_keys_for_currency_euro">¢,£,$,¥,₱</string>
<string name="more_keys_for_currency_pound">¢,$,€,¥,₱</string>
<string name="more_keys_for_currency_general">¢,$,€,£,¥,₱</string>
<string name="more_keys_for_smiley">":-)|:-) ,:-(|:-( ,;-)|;-) ,:-P|:-P ,=-O|=-O ,:-*|:-* ,:O|:O ,B-)|B-) ,:-$|:-$ ,:-!|:-! ,:-[|:-[ ,O:-)|O:-) ,:-\\\\\\\\|:-\\\\\\\\ ,:\'(|:\'( ,:-D|:-D "</string>
<string name="more_keys_for_smiley">":-)|:-) ,:-(|:-( ,;-)|;-) ,:-P|:-P ,=-O|=-O ,:-*|:-* ,:O|:O ,B-)|B-) ,:-$|:-$ ,:-!|:-! ,:-[|:-[ ,O:-)|O:-) ,:-\\\\|:-\\\\ ,:\'(|:\'( ,:-D|:-D "</string>
<string name="more_keys_for_punctuation">"\\,,\?,!,:,-,\',\",(,),/,;,+,&amp;,\@"</string>
<integer name="mini_keyboard_column_for_punctuation">7</integer>
<string name="keyhintlabel_for_punctuation"></string>

View file

@ -48,7 +48,7 @@ public class KeySpecParser {
private static final char ESCAPE_CHAR = '\\';
private static final char PREFIX_AT = '@';
private static final char SUFFIX_SLASH = '/';
private static final String PREFIX_STRING = PREFIX_AT + "string";
private static final String PREFIX_STRING = PREFIX_AT + "string" + SUFFIX_SLASH;
private static final char LABEL_END = '|';
private static final String PREFIX_ICON = PREFIX_AT + "icon" + SUFFIX_SLASH;
private static final String PREFIX_CODE = PREFIX_AT + "integer" + SUFFIX_SLASH;
@ -293,13 +293,11 @@ public class KeySpecParser {
sb.append(res.getString(resId));
pos = end - 1;
} else if (c == ESCAPE_CHAR) {
pos++;
if (sb != null) {
sb.append(c);
if (pos < size) {
sb.append(text.charAt(pos));
}
// Append both escape character and escaped character.
sb.append(text.substring(pos, Math.min(pos + 2, size)));
}
pos++;
} else if (sb != null) {
sb.append(c);
}
@ -309,10 +307,7 @@ public class KeySpecParser {
private static int searchResourceNameEnd(String text, int start) {
final int size = text.length();
if (start >= size || text.charAt(start) != SUFFIX_SLASH) {
throw new RuntimeException("Resource name not specified");
}
for (int pos = start + 1; pos < size; pos++) {
for (int pos = start; pos < size; pos++) {
final char c = text.charAt(pos);
// String resource name should be consisted of [a-z_0-9].
if ((c >= 'a' && c <= 'z') || c == '_' || (c >= '0' && c <= '9')) {
@ -333,7 +328,6 @@ public class KeySpecParser {
return new String[] { text };
}
final StringBuilder sb = new StringBuilder();
ArrayList<String> list = null;
int start = 0;
for (int pos = 0; pos < size; pos++) {
@ -342,44 +336,18 @@ public class KeySpecParser {
if (list == null) {
list = new ArrayList<String>();
}
if (sb.length() == 0) {
list.add(text.substring(start, pos));
} else {
list.add(sb.toString());
sb.setLength(0);
}
list.add(text.substring(start, pos));
// Skip comma
start = pos + 1;
continue;
}
// TODO: Only parse escaped comma. Other escaped character should be passed through
// with escaped character prefixed.
// Skip escaped sequence.
if (c == ESCAPE_CHAR) {
if (start == pos) {
// Skip escaping comma at the beginning of the text.
start++;
pos++;
} else {
if (start < pos && sb.length() == 0) {
sb.append(text.substring(start, pos));
}
// Skip comma
pos++;
if (pos < size) {
sb.append(text.charAt(pos));
}
}
} else if (sb.length() > 0) {
sb.append(c);
} else if (c == ESCAPE_CHAR) {
// Skip escape character and escaped character.
pos++;
}
}
if (list == null) {
return new String[] {
sb.length() > 0 ? sb.toString() : text.substring(start)
};
return new String[] { text.substring(start) };
} else {
list.add(sb.length() > 0 ? sb.toString() : text.substring(start));
list.add(text.substring(start));
return list.toArray(new String[list.size()]);
}
}

View file

@ -30,11 +30,14 @@
<string name="label_surrounded_by_spaces">" abc "</string>
<string name="escaped_char">"\\a"</string>
<string name="escaped_comma">"\\,"</string>
<string name="escaped_comma_escape">"a\\,\\"</string>
<string name="escaped_escape">"\\\\"</string>
<string name="escaped_label">"a\\bc"</string>
<string name="escaped_label_at_beginning">"\\abc"</string>
<string name="escaped_label_at_end">"abc\\"</string>
<string name="escaped_label_with_comma">"a\\,c"</string>
<string name="escaped_label_with_comma_at_beginning">"\\,bc"</string>
<string name="escaped_label_with_comma_at_end">"ab\\,"</string>
<string name="escaped_label_with_successive">"\\,\\\\bc"</string>
<string name="escaped_label_with_escape">"a\\\\c"</string>
<string name="multiple_chars">"a,b,c"</string>

View file

@ -24,7 +24,7 @@ import com.android.inputmethod.latin.tests.R;
import java.util.Arrays;
public class CsvParserTests extends AndroidTestCase {
public class KeySpecParserCsvTests extends AndroidTestCase {
private Resources mTestResources;
@Override
@ -79,6 +79,7 @@ public class CsvParserTests extends AndroidTestCase {
public void testParseCsvTextSingle() {
assertTextArray("Single char", "a", "a");
assertTextArray("Surrogate pair", PAIR1, PAIR1);
assertTextArray("Single escape", "\\", "\\");
assertTextArray("Space", " ", " ");
assertTextArray("Single label", "abc", "abc");
assertTextArray("Single srrogate pairs label", SURROGATE2, SURROGATE2);
@ -98,36 +99,49 @@ public class CsvParserTests extends AndroidTestCase {
"ab" + SURROGATE1 + "cd");
assertTextArray("Incomplete resource reference 1", "string", "string");
assertTextArray("Incomplete resource reference 2", "@strin", "@strin");
assertTextArray("Incomplete resource reference 3", "@" + SURROGATE2, "@" + SURROGATE2);
assertTextArray("Incomplete resource reference 2", "@string", "@string");
assertTextArray("Incomplete resource reference 3", "string/", "string/");
assertTextArray("Incomplete resource reference 4", "@" + SURROGATE2, "@" + SURROGATE2);
}
public void testParseCsvTextSingleEscaped() {
assertTextArray("Escaped char", "\\a", "a");
assertTextArray("Escaped surrogate pair", "\\" + PAIR1, PAIR1);
assertTextArray("Escaped comma", "\\,", ",");
assertTextArray("Escaped escape", "\\\\", "\\");
assertTextArray("Escaped label", "a\\bc", "abc");
assertTextArray("Escaped surrogate", "a\\" + PAIR1 + "c", "a" + PAIR1 + "c");
assertTextArray("Escaped label at beginning", "\\abc", "abc");
assertTextArray("Escaped surrogate at beginning", "\\" + SURROGATE2, SURROGATE2);
assertTextArray("Escaped label with comma", "a\\,c", "a,c");
assertTextArray("Escaped surrogate with comma", PAIR1 + "\\," + PAIR2, PAIR1 + "," + PAIR2);
assertTextArray("Escaped label with comma at beginning", "\\,bc", ",bc");
assertTextArray("Escaped char", "\\a", "\\a");
assertTextArray("Escaped surrogate pair", "\\" + PAIR1, "\\" + PAIR1);
assertTextArray("Escaped comma", "\\,", "\\,");
assertTextArray("Escaped comma escape", "a\\,\\", "a\\,\\");
assertTextArray("Escaped escape", "\\\\", "\\\\");
assertTextArray("Escaped label", "a\\bc", "a\\bc");
assertTextArray("Escaped surrogate", "a\\" + PAIR1 + "c", "a\\" + PAIR1 + "c");
assertTextArray("Escaped label at beginning", "\\abc", "\\abc");
assertTextArray("Escaped surrogate at beginning", "\\" + SURROGATE2, "\\" + SURROGATE2);
assertTextArray("Escaped label at end", "abc\\", "abc\\");
assertTextArray("Escaped surrogate at end", SURROGATE2 + "\\", SURROGATE2 + "\\");
assertTextArray("Escaped label with comma", "a\\,c", "a\\,c");
assertTextArray("Escaped surrogate with comma",
PAIR1 + "\\," + PAIR2, PAIR1 + "\\," + PAIR2);
assertTextArray("Escaped label with comma at beginning", "\\,bc", "\\,bc");
assertTextArray("Escaped surrogate with comma at beginning",
"\\," + SURROGATE1, "," + SURROGATE1);
assertTextArray("Escaped label with successive", "\\,\\\\bc", ",\\bc");
"\\," + SURROGATE1, "\\," + SURROGATE1);
assertTextArray("Escaped label with comma at end", "ab\\,", "ab\\,");
assertTextArray("Escaped surrogate with comma at end",
SURROGATE2 + "\\,", SURROGATE2 + "\\,");
assertTextArray("Escaped label with successive", "\\,\\\\bc", "\\,\\\\bc");
assertTextArray("Escaped surrogate with successive",
"\\,\\\\" + SURROGATE1, ",\\" + SURROGATE1);
assertTextArray("Escaped label with escape", "a\\\\c", "a\\c");
"\\,\\\\" + SURROGATE1, "\\,\\\\" + SURROGATE1);
assertTextArray("Escaped label with escape", "a\\\\c", "a\\\\c");
assertTextArray("Escaped surrogate with escape",
PAIR1 + "\\\\" + PAIR2, PAIR1 + "\\" + PAIR2);
PAIR1 + "\\\\" + PAIR2, PAIR1 + "\\\\" + PAIR2);
assertTextArray("Escaped @string", "\\@string/empty_string", "@string/empty_string");
assertTextArray("Escaped @string", "\\@string", "\\@string");
assertTextArray("Escaped @string/", "\\@string/", "\\@string/");
assertTextArray("Escaped @string/", "\\@string/empty_string", "\\@string/empty_string");
}
public void testParseCsvTextMulti() {
assertTextArray("Multiple chars", "a,b,c", "a", "b", "c");
assertTextArray("Multiple chars", "a,b,\\c", "a", "b", "\\c");
assertTextArray("Multiple chars and escape at beginning and end",
"\\a,b,\\c\\", "\\a", "b", "\\c\\");
assertTextArray("Multiple surrogates", PAIR1 + "," + PAIR2 + "," + PAIR3,
PAIR1, PAIR2, PAIR3);
assertTextArray("Multiple chars surrounded by spaces", " a , b , c ", " a ", " b ", " c ");
@ -139,24 +153,24 @@ public class CsvParserTests extends AndroidTestCase {
}
public void testParseCsvTextMultiEscaped() {
assertTextArray("Multiple chars with comma", "a,\\,,c", "a", ",", "c");
assertTextArray("Multiple chars with comma", "a,\\,,c", "a", "\\,", "c");
assertTextArray("Multiple chars with comma surrounded by spaces", " a , \\, , c ",
" a ", " , ", " c ");
assertTextArray("Multiple labels with escape", "\\abc,d\\ef,gh\\i", "abc", "def", "ghi");
" a ", " \\, ", " c ");
assertTextArray("Multiple labels with escape",
"\\abc,d\\ef,gh\\i", "\\abc", "d\\ef", "gh\\i");
assertTextArray("Multiple labels with escape surrounded by spaces",
" \\abc , d\\ef , gh\\i ", " abc ", " def ", " ghi ");
" \\abc , d\\ef , gh\\i ", " \\abc ", " d\\ef ", " gh\\i ");
assertTextArray("Multiple labels with comma and escape",
"ab\\\\,d\\\\\\,,g\\,i", "ab\\", "d\\,", "g,i");
"ab\\\\,d\\\\\\,,g\\,i", "ab\\\\", "d\\\\\\,", "g\\,i");
assertTextArray("Multiple labels with comma and escape surrounded by spaces",
" ab\\\\ , d\\\\\\, , g\\,i ", " ab\\ ", " d\\, ", " g,i ");
" ab\\\\ , d\\\\\\, , g\\,i ", " ab\\\\ ", " d\\\\\\, ", " g\\,i ");
assertTextArray("Multiple escaped @string", "\\@,\\@string/empty_string",
"@", "@string/empty_string");
"\\@", "\\@string/empty_string");
}
public void testParseCsvResourceError() {
assertError("Incomplete resource name 1", "@string", "@string");
assertError("Incomplete resource name 2", "@string/", "@string/");
assertError("Incomplete resource name", "@string/", "@string/");
assertError("Non existing resource", "@string/non_existing");
}
@ -182,27 +196,36 @@ public class CsvParserTests extends AndroidTestCase {
"@string/spaces_at_end_of_label", "abc ");
assertTextArray("label surrounded by spaces",
"@string/label_surrounded_by_spaces", " abc ");
assertTextArray("Escape and single char",
"\\\\@string/single_char", "\\\\a");
}
public void testParseCsvResourceSingleEscaped() {
assertTextArray("Escaped char",
"@string/escaped_char", "a");
"@string/escaped_char", "\\a");
assertTextArray("Escaped comma",
"@string/escaped_comma", ",");
"@string/escaped_comma", "\\,");
assertTextArray("Escaped comma escape",
"@string/escaped_comma_escape", "a\\,\\");
assertTextArray("Escaped escape",
"@string/escaped_escape", "\\");
"@string/escaped_escape", "\\\\");
assertTextArray("Escaped label",
"@string/escaped_label", "abc");
"@string/escaped_label", "a\\bc");
assertTextArray("Escaped label at beginning",
"@string/escaped_label_at_beginning", "abc");
"@string/escaped_label_at_beginning", "\\abc");
assertTextArray("Escaped label at end",
"@string/escaped_label_at_end", "abc\\");
assertTextArray("Escaped label with comma",
"@string/escaped_label_with_comma", "a,c");
"@string/escaped_label_with_comma", "a\\,c");
assertTextArray("Escaped label with comma at beginning",
"@string/escaped_label_with_comma_at_beginning", ",bc");
"@string/escaped_label_with_comma_at_beginning", "\\,bc");
assertTextArray("Escaped label with comma at end",
"@string/escaped_label_with_comma_at_end", "ab\\,");
assertTextArray("Escaped label with successive",
"@string/escaped_label_with_successive", ",\\bc");
"@string/escaped_label_with_successive", "\\,\\\\bc");
assertTextArray("Escaped label with escape",
"@string/escaped_label_with_escape", "a\\c");
"@string/escaped_label_with_escape", "a\\\\c");
}
public void testParseCsvResourceMulti() {
@ -220,36 +243,41 @@ public class CsvParserTests extends AndroidTestCase {
public void testParseCsvResourcetMultiEscaped() {
assertTextArray("Multiple chars with comma",
"@string/multiple_chars_with_comma",
"a", ",", "c");
"a", "\\,", "c");
assertTextArray("Multiple chars with comma surrounded by spaces",
"@string/multiple_chars_with_comma_surrounded_by_spaces",
" a ", " , ", " c ");
" a ", " \\, ", " c ");
assertTextArray("Multiple labels with escape",
"@string/multiple_labels_with_escape",
"abc", "def", "ghi");
"\\abc", "d\\ef", "gh\\i");
assertTextArray("Multiple labels with escape surrounded by spaces",
"@string/multiple_labels_with_escape_surrounded_by_spaces",
" abc ", " def ", " ghi ");
" \\abc ", " d\\ef ", " gh\\i ");
assertTextArray("Multiple labels with comma and escape",
"@string/multiple_labels_with_comma_and_escape",
"ab\\", "d\\,", "g,i");
"ab\\\\", "d\\\\\\,", "g\\,i");
assertTextArray("Multiple labels with comma and escape surrounded by spaces",
"@string/multiple_labels_with_comma_and_escape_surrounded_by_spaces",
" ab\\ ", " d\\, ", " g,i ");
" ab\\\\ ", " d\\\\\\, ", " g\\,i ");
}
public void testParseMultipleResources() {
assertTextArray("Literals and resources",
"1,@string/multiple_chars,z", "1", "a", "b", "c", "z");
assertTextArray("Literals and resources and escape at end",
"\\1,@string/multiple_chars,z\\", "\\1", "a", "b", "c", "z\\");
assertTextArray("Multiple single resource chars and labels",
"@string/single_char,@string/single_label,@string/escaped_comma",
"a", "abc", ",");
"a", "abc", "\\,");
assertTextArray("Multiple single resource chars and labels 2",
"@string/single_char,@string/single_label,@string/escaped_comma_escape",
"a", "abc", "a\\,\\");
assertTextArray("Multiple multiple resource chars and labels",
"@string/multiple_chars,@string/multiple_labels,@string/multiple_chars_with_comma",
"a", "b", "c", "abc", "def", "ghi", "a", ",", "c");
"a", "b", "c", "abc", "def", "ghi", "a", "\\,", "c");
assertTextArray("Concatenated resources",
"@string/multiple_chars@string/multiple_labels@string/multiple_chars_with_comma",
"a", "b", "cabc", "def", "ghia", ",", "c");
"a", "b", "cabc", "def", "ghia", "\\,", "c");
assertTextArray("Concatenated resource and literal",
"abc@string/multiple_labels",
"abcabc", "def", "ghi");