Parse escaped sequence strictly in CSV parser

This change alters the CSV parser behavior. The parser only resolves
  * String resource reference. ["@string/res" -> "<content_of_res>"]
  * Other occurrence of escape sequence will be intact. ["\x" -> "\x"]

Before this change, escape sequence in moreKeys string is parsed three
times. At first in parsing string resource, next in CSV parser, and at
last in KeySpecParser. So that representing single escape character
itself is a bit annoying, "\\\\\\\\".

Now we can represent single escape character itself in string resource by "\\\\".

Change-Id: Ib978e17b779cc82585eed8241ac3857508b14bc7
main
Tadashi G. Takaoka 2012-02-02 17:25:07 +09:00
parent a456c755ee
commit 5852a2594f
4 changed files with 90 additions and 91 deletions

View File

@ -54,7 +54,7 @@
<string name="more_keys_for_currency_euro">¢,£,$,¥,₱</string> <string name="more_keys_for_currency_euro">¢,£,$,¥,₱</string>
<string name="more_keys_for_currency_pound">¢,$,€,¥,₱</string> <string name="more_keys_for_currency_pound">¢,$,€,¥,₱</string>
<string name="more_keys_for_currency_general">¢,$,€,£,¥,₱</string> <string name="more_keys_for_currency_general">¢,$,€,£,¥,₱</string>
<string name="more_keys_for_smiley">":-)|:-) ,:-(|:-( ,;-)|;-) ,:-P|:-P ,=-O|=-O ,:-*|:-* ,:O|:O ,B-)|B-) ,:-$|:-$ ,:-!|:-! ,:-[|:-[ ,O:-)|O:-) ,:-\\\\\\\\|:-\\\\\\\\ ,:\'(|:\'( ,:-D|:-D "</string> <string name="more_keys_for_smiley">":-)|:-) ,:-(|:-( ,;-)|;-) ,:-P|:-P ,=-O|=-O ,:-*|:-* ,:O|:O ,B-)|B-) ,:-$|:-$ ,:-!|:-! ,:-[|:-[ ,O:-)|O:-) ,:-\\\\|:-\\\\ ,:\'(|:\'( ,:-D|:-D "</string>
<string name="more_keys_for_punctuation">"\\,,\?,!,:,-,\',\",(,),/,;,+,&amp;,\@"</string> <string name="more_keys_for_punctuation">"\\,,\?,!,:,-,\',\",(,),/,;,+,&amp;,\@"</string>
<integer name="mini_keyboard_column_for_punctuation">7</integer> <integer name="mini_keyboard_column_for_punctuation">7</integer>
<string name="keyhintlabel_for_punctuation"></string> <string name="keyhintlabel_for_punctuation"></string>

View File

@ -48,7 +48,7 @@ public class KeySpecParser {
private static final char ESCAPE_CHAR = '\\'; private static final char ESCAPE_CHAR = '\\';
private static final char PREFIX_AT = '@'; private static final char PREFIX_AT = '@';
private static final char SUFFIX_SLASH = '/'; private static final char SUFFIX_SLASH = '/';
private static final String PREFIX_STRING = PREFIX_AT + "string"; private static final String PREFIX_STRING = PREFIX_AT + "string" + SUFFIX_SLASH;
private static final char LABEL_END = '|'; private static final char LABEL_END = '|';
private static final String PREFIX_ICON = PREFIX_AT + "icon" + SUFFIX_SLASH; private static final String PREFIX_ICON = PREFIX_AT + "icon" + SUFFIX_SLASH;
private static final String PREFIX_CODE = PREFIX_AT + "integer" + SUFFIX_SLASH; private static final String PREFIX_CODE = PREFIX_AT + "integer" + SUFFIX_SLASH;
@ -293,13 +293,11 @@ public class KeySpecParser {
sb.append(res.getString(resId)); sb.append(res.getString(resId));
pos = end - 1; pos = end - 1;
} else if (c == ESCAPE_CHAR) { } else if (c == ESCAPE_CHAR) {
pos++;
if (sb != null) { if (sb != null) {
sb.append(c); // Append both escape character and escaped character.
if (pos < size) { sb.append(text.substring(pos, Math.min(pos + 2, size)));
sb.append(text.charAt(pos));
}
} }
pos++;
} else if (sb != null) { } else if (sb != null) {
sb.append(c); sb.append(c);
} }
@ -309,10 +307,7 @@ public class KeySpecParser {
private static int searchResourceNameEnd(String text, int start) { private static int searchResourceNameEnd(String text, int start) {
final int size = text.length(); final int size = text.length();
if (start >= size || text.charAt(start) != SUFFIX_SLASH) { for (int pos = start; pos < size; pos++) {
throw new RuntimeException("Resource name not specified");
}
for (int pos = start + 1; pos < size; pos++) {
final char c = text.charAt(pos); final char c = text.charAt(pos);
// String resource name should be consisted of [a-z_0-9]. // String resource name should be consisted of [a-z_0-9].
if ((c >= 'a' && c <= 'z') || c == '_' || (c >= '0' && c <= '9')) { if ((c >= 'a' && c <= 'z') || c == '_' || (c >= '0' && c <= '9')) {
@ -333,7 +328,6 @@ public class KeySpecParser {
return new String[] { text }; return new String[] { text };
} }
final StringBuilder sb = new StringBuilder();
ArrayList<String> list = null; ArrayList<String> list = null;
int start = 0; int start = 0;
for (int pos = 0; pos < size; pos++) { for (int pos = 0; pos < size; pos++) {
@ -342,44 +336,18 @@ public class KeySpecParser {
if (list == null) { if (list == null) {
list = new ArrayList<String>(); list = new ArrayList<String>();
} }
if (sb.length() == 0) {
list.add(text.substring(start, pos)); list.add(text.substring(start, pos));
} else {
list.add(sb.toString());
sb.setLength(0);
}
// Skip comma // Skip comma
start = pos + 1; start = pos + 1;
continue; } else if (c == ESCAPE_CHAR) {
} // Skip escape character and escaped character.
// TODO: Only parse escaped comma. Other escaped character should be passed through
// with escaped character prefixed.
// Skip escaped sequence.
if (c == ESCAPE_CHAR) {
if (start == pos) {
// Skip escaping comma at the beginning of the text.
start++;
pos++; pos++;
} else {
if (start < pos && sb.length() == 0) {
sb.append(text.substring(start, pos));
}
// Skip comma
pos++;
if (pos < size) {
sb.append(text.charAt(pos));
}
}
} else if (sb.length() > 0) {
sb.append(c);
} }
} }
if (list == null) { if (list == null) {
return new String[] { return new String[] { text.substring(start) };
sb.length() > 0 ? sb.toString() : text.substring(start)
};
} else { } else {
list.add(sb.length() > 0 ? sb.toString() : text.substring(start)); list.add(text.substring(start));
return list.toArray(new String[list.size()]); return list.toArray(new String[list.size()]);
} }
} }

View File

@ -30,11 +30,14 @@
<string name="label_surrounded_by_spaces">" abc "</string> <string name="label_surrounded_by_spaces">" abc "</string>
<string name="escaped_char">"\\a"</string> <string name="escaped_char">"\\a"</string>
<string name="escaped_comma">"\\,"</string> <string name="escaped_comma">"\\,"</string>
<string name="escaped_comma_escape">"a\\,\\"</string>
<string name="escaped_escape">"\\\\"</string> <string name="escaped_escape">"\\\\"</string>
<string name="escaped_label">"a\\bc"</string> <string name="escaped_label">"a\\bc"</string>
<string name="escaped_label_at_beginning">"\\abc"</string> <string name="escaped_label_at_beginning">"\\abc"</string>
<string name="escaped_label_at_end">"abc\\"</string>
<string name="escaped_label_with_comma">"a\\,c"</string> <string name="escaped_label_with_comma">"a\\,c"</string>
<string name="escaped_label_with_comma_at_beginning">"\\,bc"</string> <string name="escaped_label_with_comma_at_beginning">"\\,bc"</string>
<string name="escaped_label_with_comma_at_end">"ab\\,"</string>
<string name="escaped_label_with_successive">"\\,\\\\bc"</string> <string name="escaped_label_with_successive">"\\,\\\\bc"</string>
<string name="escaped_label_with_escape">"a\\\\c"</string> <string name="escaped_label_with_escape">"a\\\\c"</string>
<string name="multiple_chars">"a,b,c"</string> <string name="multiple_chars">"a,b,c"</string>

View File

@ -24,7 +24,7 @@ import com.android.inputmethod.latin.tests.R;
import java.util.Arrays; import java.util.Arrays;
public class CsvParserTests extends AndroidTestCase { public class KeySpecParserCsvTests extends AndroidTestCase {
private Resources mTestResources; private Resources mTestResources;
@Override @Override
@ -79,6 +79,7 @@ public class CsvParserTests extends AndroidTestCase {
public void testParseCsvTextSingle() { public void testParseCsvTextSingle() {
assertTextArray("Single char", "a", "a"); assertTextArray("Single char", "a", "a");
assertTextArray("Surrogate pair", PAIR1, PAIR1); assertTextArray("Surrogate pair", PAIR1, PAIR1);
assertTextArray("Single escape", "\\", "\\");
assertTextArray("Space", " ", " "); assertTextArray("Space", " ", " ");
assertTextArray("Single label", "abc", "abc"); assertTextArray("Single label", "abc", "abc");
assertTextArray("Single srrogate pairs label", SURROGATE2, SURROGATE2); assertTextArray("Single srrogate pairs label", SURROGATE2, SURROGATE2);
@ -98,36 +99,49 @@ public class CsvParserTests extends AndroidTestCase {
"ab" + SURROGATE1 + "cd"); "ab" + SURROGATE1 + "cd");
assertTextArray("Incomplete resource reference 1", "string", "string"); assertTextArray("Incomplete resource reference 1", "string", "string");
assertTextArray("Incomplete resource reference 2", "@strin", "@strin"); assertTextArray("Incomplete resource reference 2", "@string", "@string");
assertTextArray("Incomplete resource reference 3", "@" + SURROGATE2, "@" + SURROGATE2); assertTextArray("Incomplete resource reference 3", "string/", "string/");
assertTextArray("Incomplete resource reference 4", "@" + SURROGATE2, "@" + SURROGATE2);
} }
public void testParseCsvTextSingleEscaped() { public void testParseCsvTextSingleEscaped() {
assertTextArray("Escaped char", "\\a", "a"); assertTextArray("Escaped char", "\\a", "\\a");
assertTextArray("Escaped surrogate pair", "\\" + PAIR1, PAIR1); assertTextArray("Escaped surrogate pair", "\\" + PAIR1, "\\" + PAIR1);
assertTextArray("Escaped comma", "\\,", ","); assertTextArray("Escaped comma", "\\,", "\\,");
assertTextArray("Escaped escape", "\\\\", "\\"); assertTextArray("Escaped comma escape", "a\\,\\", "a\\,\\");
assertTextArray("Escaped label", "a\\bc", "abc"); assertTextArray("Escaped escape", "\\\\", "\\\\");
assertTextArray("Escaped surrogate", "a\\" + PAIR1 + "c", "a" + PAIR1 + "c"); assertTextArray("Escaped label", "a\\bc", "a\\bc");
assertTextArray("Escaped label at beginning", "\\abc", "abc"); assertTextArray("Escaped surrogate", "a\\" + PAIR1 + "c", "a\\" + PAIR1 + "c");
assertTextArray("Escaped surrogate at beginning", "\\" + SURROGATE2, SURROGATE2); assertTextArray("Escaped label at beginning", "\\abc", "\\abc");
assertTextArray("Escaped label with comma", "a\\,c", "a,c"); assertTextArray("Escaped surrogate at beginning", "\\" + SURROGATE2, "\\" + SURROGATE2);
assertTextArray("Escaped surrogate with comma", PAIR1 + "\\," + PAIR2, PAIR1 + "," + PAIR2); assertTextArray("Escaped label at end", "abc\\", "abc\\");
assertTextArray("Escaped label with comma at beginning", "\\,bc", ",bc"); assertTextArray("Escaped surrogate at end", SURROGATE2 + "\\", SURROGATE2 + "\\");
assertTextArray("Escaped label with comma", "a\\,c", "a\\,c");
assertTextArray("Escaped surrogate with comma",
PAIR1 + "\\," + PAIR2, PAIR1 + "\\," + PAIR2);
assertTextArray("Escaped label with comma at beginning", "\\,bc", "\\,bc");
assertTextArray("Escaped surrogate with comma at beginning", assertTextArray("Escaped surrogate with comma at beginning",
"\\," + SURROGATE1, "," + SURROGATE1); "\\," + SURROGATE1, "\\," + SURROGATE1);
assertTextArray("Escaped label with successive", "\\,\\\\bc", ",\\bc"); assertTextArray("Escaped label with comma at end", "ab\\,", "ab\\,");
assertTextArray("Escaped surrogate with comma at end",
SURROGATE2 + "\\,", SURROGATE2 + "\\,");
assertTextArray("Escaped label with successive", "\\,\\\\bc", "\\,\\\\bc");
assertTextArray("Escaped surrogate with successive", assertTextArray("Escaped surrogate with successive",
"\\,\\\\" + SURROGATE1, ",\\" + SURROGATE1); "\\,\\\\" + SURROGATE1, "\\,\\\\" + SURROGATE1);
assertTextArray("Escaped label with escape", "a\\\\c", "a\\c"); assertTextArray("Escaped label with escape", "a\\\\c", "a\\\\c");
assertTextArray("Escaped surrogate with escape", assertTextArray("Escaped surrogate with escape",
PAIR1 + "\\\\" + PAIR2, PAIR1 + "\\" + PAIR2); PAIR1 + "\\\\" + PAIR2, PAIR1 + "\\\\" + PAIR2);
assertTextArray("Escaped @string", "\\@string/empty_string", "@string/empty_string"); assertTextArray("Escaped @string", "\\@string", "\\@string");
assertTextArray("Escaped @string/", "\\@string/", "\\@string/");
assertTextArray("Escaped @string/", "\\@string/empty_string", "\\@string/empty_string");
} }
public void testParseCsvTextMulti() { public void testParseCsvTextMulti() {
assertTextArray("Multiple chars", "a,b,c", "a", "b", "c"); assertTextArray("Multiple chars", "a,b,c", "a", "b", "c");
assertTextArray("Multiple chars", "a,b,\\c", "a", "b", "\\c");
assertTextArray("Multiple chars and escape at beginning and end",
"\\a,b,\\c\\", "\\a", "b", "\\c\\");
assertTextArray("Multiple surrogates", PAIR1 + "," + PAIR2 + "," + PAIR3, assertTextArray("Multiple surrogates", PAIR1 + "," + PAIR2 + "," + PAIR3,
PAIR1, PAIR2, PAIR3); PAIR1, PAIR2, PAIR3);
assertTextArray("Multiple chars surrounded by spaces", " a , b , c ", " a ", " b ", " c "); assertTextArray("Multiple chars surrounded by spaces", " a , b , c ", " a ", " b ", " c ");
@ -139,24 +153,24 @@ public class CsvParserTests extends AndroidTestCase {
} }
public void testParseCsvTextMultiEscaped() { public void testParseCsvTextMultiEscaped() {
assertTextArray("Multiple chars with comma", "a,\\,,c", "a", ",", "c"); assertTextArray("Multiple chars with comma", "a,\\,,c", "a", "\\,", "c");
assertTextArray("Multiple chars with comma surrounded by spaces", " a , \\, , c ", assertTextArray("Multiple chars with comma surrounded by spaces", " a , \\, , c ",
" a ", " , ", " c "); " a ", " \\, ", " c ");
assertTextArray("Multiple labels with escape", "\\abc,d\\ef,gh\\i", "abc", "def", "ghi"); assertTextArray("Multiple labels with escape",
"\\abc,d\\ef,gh\\i", "\\abc", "d\\ef", "gh\\i");
assertTextArray("Multiple labels with escape surrounded by spaces", assertTextArray("Multiple labels with escape surrounded by spaces",
" \\abc , d\\ef , gh\\i ", " abc ", " def ", " ghi "); " \\abc , d\\ef , gh\\i ", " \\abc ", " d\\ef ", " gh\\i ");
assertTextArray("Multiple labels with comma and escape", assertTextArray("Multiple labels with comma and escape",
"ab\\\\,d\\\\\\,,g\\,i", "ab\\", "d\\,", "g,i"); "ab\\\\,d\\\\\\,,g\\,i", "ab\\\\", "d\\\\\\,", "g\\,i");
assertTextArray("Multiple labels with comma and escape surrounded by spaces", assertTextArray("Multiple labels with comma and escape surrounded by spaces",
" ab\\\\ , d\\\\\\, , g\\,i ", " ab\\ ", " d\\, ", " g,i "); " ab\\\\ , d\\\\\\, , g\\,i ", " ab\\\\ ", " d\\\\\\, ", " g\\,i ");
assertTextArray("Multiple escaped @string", "\\@,\\@string/empty_string", assertTextArray("Multiple escaped @string", "\\@,\\@string/empty_string",
"@", "@string/empty_string"); "\\@", "\\@string/empty_string");
} }
public void testParseCsvResourceError() { public void testParseCsvResourceError() {
assertError("Incomplete resource name 1", "@string", "@string"); assertError("Incomplete resource name", "@string/", "@string/");
assertError("Incomplete resource name 2", "@string/", "@string/");
assertError("Non existing resource", "@string/non_existing"); assertError("Non existing resource", "@string/non_existing");
} }
@ -182,27 +196,36 @@ public class CsvParserTests extends AndroidTestCase {
"@string/spaces_at_end_of_label", "abc "); "@string/spaces_at_end_of_label", "abc ");
assertTextArray("label surrounded by spaces", assertTextArray("label surrounded by spaces",
"@string/label_surrounded_by_spaces", " abc "); "@string/label_surrounded_by_spaces", " abc ");
assertTextArray("Escape and single char",
"\\\\@string/single_char", "\\\\a");
} }
public void testParseCsvResourceSingleEscaped() { public void testParseCsvResourceSingleEscaped() {
assertTextArray("Escaped char", assertTextArray("Escaped char",
"@string/escaped_char", "a"); "@string/escaped_char", "\\a");
assertTextArray("Escaped comma", assertTextArray("Escaped comma",
"@string/escaped_comma", ","); "@string/escaped_comma", "\\,");
assertTextArray("Escaped comma escape",
"@string/escaped_comma_escape", "a\\,\\");
assertTextArray("Escaped escape", assertTextArray("Escaped escape",
"@string/escaped_escape", "\\"); "@string/escaped_escape", "\\\\");
assertTextArray("Escaped label", assertTextArray("Escaped label",
"@string/escaped_label", "abc"); "@string/escaped_label", "a\\bc");
assertTextArray("Escaped label at beginning", assertTextArray("Escaped label at beginning",
"@string/escaped_label_at_beginning", "abc"); "@string/escaped_label_at_beginning", "\\abc");
assertTextArray("Escaped label at end",
"@string/escaped_label_at_end", "abc\\");
assertTextArray("Escaped label with comma", assertTextArray("Escaped label with comma",
"@string/escaped_label_with_comma", "a,c"); "@string/escaped_label_with_comma", "a\\,c");
assertTextArray("Escaped label with comma at beginning", assertTextArray("Escaped label with comma at beginning",
"@string/escaped_label_with_comma_at_beginning", ",bc"); "@string/escaped_label_with_comma_at_beginning", "\\,bc");
assertTextArray("Escaped label with comma at end",
"@string/escaped_label_with_comma_at_end", "ab\\,");
assertTextArray("Escaped label with successive", assertTextArray("Escaped label with successive",
"@string/escaped_label_with_successive", ",\\bc"); "@string/escaped_label_with_successive", "\\,\\\\bc");
assertTextArray("Escaped label with escape", assertTextArray("Escaped label with escape",
"@string/escaped_label_with_escape", "a\\c"); "@string/escaped_label_with_escape", "a\\\\c");
} }
public void testParseCsvResourceMulti() { public void testParseCsvResourceMulti() {
@ -220,36 +243,41 @@ public class CsvParserTests extends AndroidTestCase {
public void testParseCsvResourcetMultiEscaped() { public void testParseCsvResourcetMultiEscaped() {
assertTextArray("Multiple chars with comma", assertTextArray("Multiple chars with comma",
"@string/multiple_chars_with_comma", "@string/multiple_chars_with_comma",
"a", ",", "c"); "a", "\\,", "c");
assertTextArray("Multiple chars with comma surrounded by spaces", assertTextArray("Multiple chars with comma surrounded by spaces",
"@string/multiple_chars_with_comma_surrounded_by_spaces", "@string/multiple_chars_with_comma_surrounded_by_spaces",
" a ", " , ", " c "); " a ", " \\, ", " c ");
assertTextArray("Multiple labels with escape", assertTextArray("Multiple labels with escape",
"@string/multiple_labels_with_escape", "@string/multiple_labels_with_escape",
"abc", "def", "ghi"); "\\abc", "d\\ef", "gh\\i");
assertTextArray("Multiple labels with escape surrounded by spaces", assertTextArray("Multiple labels with escape surrounded by spaces",
"@string/multiple_labels_with_escape_surrounded_by_spaces", "@string/multiple_labels_with_escape_surrounded_by_spaces",
" abc ", " def ", " ghi "); " \\abc ", " d\\ef ", " gh\\i ");
assertTextArray("Multiple labels with comma and escape", assertTextArray("Multiple labels with comma and escape",
"@string/multiple_labels_with_comma_and_escape", "@string/multiple_labels_with_comma_and_escape",
"ab\\", "d\\,", "g,i"); "ab\\\\", "d\\\\\\,", "g\\,i");
assertTextArray("Multiple labels with comma and escape surrounded by spaces", assertTextArray("Multiple labels with comma and escape surrounded by spaces",
"@string/multiple_labels_with_comma_and_escape_surrounded_by_spaces", "@string/multiple_labels_with_comma_and_escape_surrounded_by_spaces",
" ab\\ ", " d\\, ", " g,i "); " ab\\\\ ", " d\\\\\\, ", " g\\,i ");
} }
public void testParseMultipleResources() { public void testParseMultipleResources() {
assertTextArray("Literals and resources", assertTextArray("Literals and resources",
"1,@string/multiple_chars,z", "1", "a", "b", "c", "z"); "1,@string/multiple_chars,z", "1", "a", "b", "c", "z");
assertTextArray("Literals and resources and escape at end",
"\\1,@string/multiple_chars,z\\", "\\1", "a", "b", "c", "z\\");
assertTextArray("Multiple single resource chars and labels", assertTextArray("Multiple single resource chars and labels",
"@string/single_char,@string/single_label,@string/escaped_comma", "@string/single_char,@string/single_label,@string/escaped_comma",
"a", "abc", ","); "a", "abc", "\\,");
assertTextArray("Multiple single resource chars and labels 2",
"@string/single_char,@string/single_label,@string/escaped_comma_escape",
"a", "abc", "a\\,\\");
assertTextArray("Multiple multiple resource chars and labels", assertTextArray("Multiple multiple resource chars and labels",
"@string/multiple_chars,@string/multiple_labels,@string/multiple_chars_with_comma", "@string/multiple_chars,@string/multiple_labels,@string/multiple_chars_with_comma",
"a", "b", "c", "abc", "def", "ghi", "a", ",", "c"); "a", "b", "c", "abc", "def", "ghi", "a", "\\,", "c");
assertTextArray("Concatenated resources", assertTextArray("Concatenated resources",
"@string/multiple_chars@string/multiple_labels@string/multiple_chars_with_comma", "@string/multiple_chars@string/multiple_labels@string/multiple_chars_with_comma",
"a", "b", "cabc", "def", "ghia", ",", "c"); "a", "b", "cabc", "def", "ghia", "\\,", "c");
assertTextArray("Concatenated resource and literal", assertTextArray("Concatenated resource and literal",
"abc@string/multiple_labels", "abc@string/multiple_labels",
"abcabc", "def", "ghi"); "abcabc", "def", "ghi");