Parse escaped sequence strictly in CSV parser

This change alters the CSV parser behavior. The parser only resolves
  * String resource reference. ["@string/res" -> "<content_of_res>"]
  * Other occurrence of escape sequence will be intact. ["\x" -> "\x"]

Before this change, escape sequence in moreKeys string is parsed three
times. At first in parsing string resource, next in CSV parser, and at
last in KeySpecParser. So that representing single escape character
itself is a bit annoying, "\\\\\\\\".

Now we can represent single escape character itself in string resource by "\\\\".

Change-Id: Ib978e17b779cc82585eed8241ac3857508b14bc7
main
Tadashi G. Takaoka 2012-02-02 17:25:07 +09:00
parent a456c755ee
commit 5852a2594f
4 changed files with 90 additions and 91 deletions

View File

@ -54,7 +54,7 @@
<string name="more_keys_for_currency_euro">¢,£,$,¥,₱</string>
<string name="more_keys_for_currency_pound">¢,$,€,¥,₱</string>
<string name="more_keys_for_currency_general">¢,$,€,£,¥,₱</string>
<string name="more_keys_for_smiley">":-)|:-) ,:-(|:-( ,;-)|;-) ,:-P|:-P ,=-O|=-O ,:-*|:-* ,:O|:O ,B-)|B-) ,:-$|:-$ ,:-!|:-! ,:-[|:-[ ,O:-)|O:-) ,:-\\\\\\\\|:-\\\\\\\\ ,:\'(|:\'( ,:-D|:-D "</string>
<string name="more_keys_for_smiley">":-)|:-) ,:-(|:-( ,;-)|;-) ,:-P|:-P ,=-O|=-O ,:-*|:-* ,:O|:O ,B-)|B-) ,:-$|:-$ ,:-!|:-! ,:-[|:-[ ,O:-)|O:-) ,:-\\\\|:-\\\\ ,:\'(|:\'( ,:-D|:-D "</string>
<string name="more_keys_for_punctuation">"\\,,\?,!,:,-,\',\",(,),/,;,+,&amp;,\@"</string>
<integer name="mini_keyboard_column_for_punctuation">7</integer>
<string name="keyhintlabel_for_punctuation"></string>

View File

@ -48,7 +48,7 @@ public class KeySpecParser {
private static final char ESCAPE_CHAR = '\\';
private static final char PREFIX_AT = '@';
private static final char SUFFIX_SLASH = '/';
private static final String PREFIX_STRING = PREFIX_AT + "string";
private static final String PREFIX_STRING = PREFIX_AT + "string" + SUFFIX_SLASH;
private static final char LABEL_END = '|';
private static final String PREFIX_ICON = PREFIX_AT + "icon" + SUFFIX_SLASH;
private static final String PREFIX_CODE = PREFIX_AT + "integer" + SUFFIX_SLASH;
@ -293,13 +293,11 @@ public class KeySpecParser {
sb.append(res.getString(resId));
pos = end - 1;
} else if (c == ESCAPE_CHAR) {
pos++;
if (sb != null) {
sb.append(c);
if (pos < size) {
sb.append(text.charAt(pos));
}
// Append both escape character and escaped character.
sb.append(text.substring(pos, Math.min(pos + 2, size)));
}
pos++;
} else if (sb != null) {
sb.append(c);
}
@ -309,10 +307,7 @@ public class KeySpecParser {
private static int searchResourceNameEnd(String text, int start) {
final int size = text.length();
if (start >= size || text.charAt(start) != SUFFIX_SLASH) {
throw new RuntimeException("Resource name not specified");
}
for (int pos = start + 1; pos < size; pos++) {
for (int pos = start; pos < size; pos++) {
final char c = text.charAt(pos);
// String resource name should be consisted of [a-z_0-9].
if ((c >= 'a' && c <= 'z') || c == '_' || (c >= '0' && c <= '9')) {
@ -333,7 +328,6 @@ public class KeySpecParser {
return new String[] { text };
}
final StringBuilder sb = new StringBuilder();
ArrayList<String> list = null;
int start = 0;
for (int pos = 0; pos < size; pos++) {
@ -342,44 +336,18 @@ public class KeySpecParser {
if (list == null) {
list = new ArrayList<String>();
}
if (sb.length() == 0) {
list.add(text.substring(start, pos));
} else {
list.add(sb.toString());
sb.setLength(0);
}
list.add(text.substring(start, pos));
// Skip comma
start = pos + 1;
continue;
}
// TODO: Only parse escaped comma. Other escaped character should be passed through
// with escaped character prefixed.
// Skip escaped sequence.
if (c == ESCAPE_CHAR) {
if (start == pos) {
// Skip escaping comma at the beginning of the text.
start++;
pos++;
} else {
if (start < pos && sb.length() == 0) {
sb.append(text.substring(start, pos));
}
// Skip comma
pos++;
if (pos < size) {
sb.append(text.charAt(pos));
}
}
} else if (sb.length() > 0) {
sb.append(c);
} else if (c == ESCAPE_CHAR) {
// Skip escape character and escaped character.
pos++;
}
}
if (list == null) {
return new String[] {
sb.length() > 0 ? sb.toString() : text.substring(start)
};
return new String[] { text.substring(start) };
} else {
list.add(sb.length() > 0 ? sb.toString() : text.substring(start));
list.add(text.substring(start));
return list.toArray(new String[list.size()]);
}
}

View File

@ -30,11 +30,14 @@
<string name="label_surrounded_by_spaces">" abc "</string>
<string name="escaped_char">"\\a"</string>
<string name="escaped_comma">"\\,"</string>
<string name="escaped_comma_escape">"a\\,\\"</string>
<string name="escaped_escape">"\\\\"</string>
<string name="escaped_label">"a\\bc"</string>
<string name="escaped_label_at_beginning">"\\abc"</string>
<string name="escaped_label_at_end">"abc\\"</string>
<string name="escaped_label_with_comma">"a\\,c"</string>
<string name="escaped_label_with_comma_at_beginning">"\\,bc"</string>
<string name="escaped_label_with_comma_at_end">"ab\\,"</string>
<string name="escaped_label_with_successive">"\\,\\\\bc"</string>
<string name="escaped_label_with_escape">"a\\\\c"</string>
<string name="multiple_chars">"a,b,c"</string>

View File

@ -24,7 +24,7 @@ import com.android.inputmethod.latin.tests.R;
import java.util.Arrays;
public class CsvParserTests extends AndroidTestCase {
public class KeySpecParserCsvTests extends AndroidTestCase {
private Resources mTestResources;
@Override
@ -79,6 +79,7 @@ public class CsvParserTests extends AndroidTestCase {
public void testParseCsvTextSingle() {
assertTextArray("Single char", "a", "a");
assertTextArray("Surrogate pair", PAIR1, PAIR1);
assertTextArray("Single escape", "\\", "\\");
assertTextArray("Space", " ", " ");
assertTextArray("Single label", "abc", "abc");
assertTextArray("Single srrogate pairs label", SURROGATE2, SURROGATE2);
@ -98,36 +99,49 @@ public class CsvParserTests extends AndroidTestCase {
"ab" + SURROGATE1 + "cd");
assertTextArray("Incomplete resource reference 1", "string", "string");
assertTextArray("Incomplete resource reference 2", "@strin", "@strin");
assertTextArray("Incomplete resource reference 3", "@" + SURROGATE2, "@" + SURROGATE2);
assertTextArray("Incomplete resource reference 2", "@string", "@string");
assertTextArray("Incomplete resource reference 3", "string/", "string/");
assertTextArray("Incomplete resource reference 4", "@" + SURROGATE2, "@" + SURROGATE2);
}
public void testParseCsvTextSingleEscaped() {
assertTextArray("Escaped char", "\\a", "a");
assertTextArray("Escaped surrogate pair", "\\" + PAIR1, PAIR1);
assertTextArray("Escaped comma", "\\,", ",");
assertTextArray("Escaped escape", "\\\\", "\\");
assertTextArray("Escaped label", "a\\bc", "abc");
assertTextArray("Escaped surrogate", "a\\" + PAIR1 + "c", "a" + PAIR1 + "c");
assertTextArray("Escaped label at beginning", "\\abc", "abc");
assertTextArray("Escaped surrogate at beginning", "\\" + SURROGATE2, SURROGATE2);
assertTextArray("Escaped label with comma", "a\\,c", "a,c");
assertTextArray("Escaped surrogate with comma", PAIR1 + "\\," + PAIR2, PAIR1 + "," + PAIR2);
assertTextArray("Escaped label with comma at beginning", "\\,bc", ",bc");
assertTextArray("Escaped char", "\\a", "\\a");
assertTextArray("Escaped surrogate pair", "\\" + PAIR1, "\\" + PAIR1);
assertTextArray("Escaped comma", "\\,", "\\,");
assertTextArray("Escaped comma escape", "a\\,\\", "a\\,\\");
assertTextArray("Escaped escape", "\\\\", "\\\\");
assertTextArray("Escaped label", "a\\bc", "a\\bc");
assertTextArray("Escaped surrogate", "a\\" + PAIR1 + "c", "a\\" + PAIR1 + "c");
assertTextArray("Escaped label at beginning", "\\abc", "\\abc");
assertTextArray("Escaped surrogate at beginning", "\\" + SURROGATE2, "\\" + SURROGATE2);
assertTextArray("Escaped label at end", "abc\\", "abc\\");
assertTextArray("Escaped surrogate at end", SURROGATE2 + "\\", SURROGATE2 + "\\");
assertTextArray("Escaped label with comma", "a\\,c", "a\\,c");
assertTextArray("Escaped surrogate with comma",
PAIR1 + "\\," + PAIR2, PAIR1 + "\\," + PAIR2);
assertTextArray("Escaped label with comma at beginning", "\\,bc", "\\,bc");
assertTextArray("Escaped surrogate with comma at beginning",
"\\," + SURROGATE1, "," + SURROGATE1);
assertTextArray("Escaped label with successive", "\\,\\\\bc", ",\\bc");
"\\," + SURROGATE1, "\\," + SURROGATE1);
assertTextArray("Escaped label with comma at end", "ab\\,", "ab\\,");
assertTextArray("Escaped surrogate with comma at end",
SURROGATE2 + "\\,", SURROGATE2 + "\\,");
assertTextArray("Escaped label with successive", "\\,\\\\bc", "\\,\\\\bc");
assertTextArray("Escaped surrogate with successive",
"\\,\\\\" + SURROGATE1, ",\\" + SURROGATE1);
assertTextArray("Escaped label with escape", "a\\\\c", "a\\c");
"\\,\\\\" + SURROGATE1, "\\,\\\\" + SURROGATE1);
assertTextArray("Escaped label with escape", "a\\\\c", "a\\\\c");
assertTextArray("Escaped surrogate with escape",
PAIR1 + "\\\\" + PAIR2, PAIR1 + "\\" + PAIR2);
PAIR1 + "\\\\" + PAIR2, PAIR1 + "\\\\" + PAIR2);
assertTextArray("Escaped @string", "\\@string/empty_string", "@string/empty_string");
assertTextArray("Escaped @string", "\\@string", "\\@string");
assertTextArray("Escaped @string/", "\\@string/", "\\@string/");
assertTextArray("Escaped @string/", "\\@string/empty_string", "\\@string/empty_string");
}
public void testParseCsvTextMulti() {
assertTextArray("Multiple chars", "a,b,c", "a", "b", "c");
assertTextArray("Multiple chars", "a,b,\\c", "a", "b", "\\c");
assertTextArray("Multiple chars and escape at beginning and end",
"\\a,b,\\c\\", "\\a", "b", "\\c\\");
assertTextArray("Multiple surrogates", PAIR1 + "," + PAIR2 + "," + PAIR3,
PAIR1, PAIR2, PAIR3);
assertTextArray("Multiple chars surrounded by spaces", " a , b , c ", " a ", " b ", " c ");
@ -139,24 +153,24 @@ public class CsvParserTests extends AndroidTestCase {
}
public void testParseCsvTextMultiEscaped() {
assertTextArray("Multiple chars with comma", "a,\\,,c", "a", ",", "c");
assertTextArray("Multiple chars with comma", "a,\\,,c", "a", "\\,", "c");
assertTextArray("Multiple chars with comma surrounded by spaces", " a , \\, , c ",
" a ", " , ", " c ");
assertTextArray("Multiple labels with escape", "\\abc,d\\ef,gh\\i", "abc", "def", "ghi");
" a ", " \\, ", " c ");
assertTextArray("Multiple labels with escape",
"\\abc,d\\ef,gh\\i", "\\abc", "d\\ef", "gh\\i");
assertTextArray("Multiple labels with escape surrounded by spaces",
" \\abc , d\\ef , gh\\i ", " abc ", " def ", " ghi ");
" \\abc , d\\ef , gh\\i ", " \\abc ", " d\\ef ", " gh\\i ");
assertTextArray("Multiple labels with comma and escape",
"ab\\\\,d\\\\\\,,g\\,i", "ab\\", "d\\,", "g,i");
"ab\\\\,d\\\\\\,,g\\,i", "ab\\\\", "d\\\\\\,", "g\\,i");
assertTextArray("Multiple labels with comma and escape surrounded by spaces",
" ab\\\\ , d\\\\\\, , g\\,i ", " ab\\ ", " d\\, ", " g,i ");
" ab\\\\ , d\\\\\\, , g\\,i ", " ab\\\\ ", " d\\\\\\, ", " g\\,i ");
assertTextArray("Multiple escaped @string", "\\@,\\@string/empty_string",
"@", "@string/empty_string");
"\\@", "\\@string/empty_string");
}
public void testParseCsvResourceError() {
assertError("Incomplete resource name 1", "@string", "@string");
assertError("Incomplete resource name 2", "@string/", "@string/");
assertError("Incomplete resource name", "@string/", "@string/");
assertError("Non existing resource", "@string/non_existing");
}
@ -182,27 +196,36 @@ public class CsvParserTests extends AndroidTestCase {
"@string/spaces_at_end_of_label", "abc ");
assertTextArray("label surrounded by spaces",
"@string/label_surrounded_by_spaces", " abc ");
assertTextArray("Escape and single char",
"\\\\@string/single_char", "\\\\a");
}
public void testParseCsvResourceSingleEscaped() {
assertTextArray("Escaped char",
"@string/escaped_char", "a");
"@string/escaped_char", "\\a");
assertTextArray("Escaped comma",
"@string/escaped_comma", ",");
"@string/escaped_comma", "\\,");
assertTextArray("Escaped comma escape",
"@string/escaped_comma_escape", "a\\,\\");
assertTextArray("Escaped escape",
"@string/escaped_escape", "\\");
"@string/escaped_escape", "\\\\");
assertTextArray("Escaped label",
"@string/escaped_label", "abc");
"@string/escaped_label", "a\\bc");
assertTextArray("Escaped label at beginning",
"@string/escaped_label_at_beginning", "abc");
"@string/escaped_label_at_beginning", "\\abc");
assertTextArray("Escaped label at end",
"@string/escaped_label_at_end", "abc\\");
assertTextArray("Escaped label with comma",
"@string/escaped_label_with_comma", "a,c");
"@string/escaped_label_with_comma", "a\\,c");
assertTextArray("Escaped label with comma at beginning",
"@string/escaped_label_with_comma_at_beginning", ",bc");
"@string/escaped_label_with_comma_at_beginning", "\\,bc");
assertTextArray("Escaped label with comma at end",
"@string/escaped_label_with_comma_at_end", "ab\\,");
assertTextArray("Escaped label with successive",
"@string/escaped_label_with_successive", ",\\bc");
"@string/escaped_label_with_successive", "\\,\\\\bc");
assertTextArray("Escaped label with escape",
"@string/escaped_label_with_escape", "a\\c");
"@string/escaped_label_with_escape", "a\\\\c");
}
public void testParseCsvResourceMulti() {
@ -220,36 +243,41 @@ public class CsvParserTests extends AndroidTestCase {
public void testParseCsvResourcetMultiEscaped() {
assertTextArray("Multiple chars with comma",
"@string/multiple_chars_with_comma",
"a", ",", "c");
"a", "\\,", "c");
assertTextArray("Multiple chars with comma surrounded by spaces",
"@string/multiple_chars_with_comma_surrounded_by_spaces",
" a ", " , ", " c ");
" a ", " \\, ", " c ");
assertTextArray("Multiple labels with escape",
"@string/multiple_labels_with_escape",
"abc", "def", "ghi");
"\\abc", "d\\ef", "gh\\i");
assertTextArray("Multiple labels with escape surrounded by spaces",
"@string/multiple_labels_with_escape_surrounded_by_spaces",
" abc ", " def ", " ghi ");
" \\abc ", " d\\ef ", " gh\\i ");
assertTextArray("Multiple labels with comma and escape",
"@string/multiple_labels_with_comma_and_escape",
"ab\\", "d\\,", "g,i");
"ab\\\\", "d\\\\\\,", "g\\,i");
assertTextArray("Multiple labels with comma and escape surrounded by spaces",
"@string/multiple_labels_with_comma_and_escape_surrounded_by_spaces",
" ab\\ ", " d\\, ", " g,i ");
" ab\\\\ ", " d\\\\\\, ", " g\\,i ");
}
public void testParseMultipleResources() {
assertTextArray("Literals and resources",
"1,@string/multiple_chars,z", "1", "a", "b", "c", "z");
assertTextArray("Literals and resources and escape at end",
"\\1,@string/multiple_chars,z\\", "\\1", "a", "b", "c", "z\\");
assertTextArray("Multiple single resource chars and labels",
"@string/single_char,@string/single_label,@string/escaped_comma",
"a", "abc", ",");
"a", "abc", "\\,");
assertTextArray("Multiple single resource chars and labels 2",
"@string/single_char,@string/single_label,@string/escaped_comma_escape",
"a", "abc", "a\\,\\");
assertTextArray("Multiple multiple resource chars and labels",
"@string/multiple_chars,@string/multiple_labels,@string/multiple_chars_with_comma",
"a", "b", "c", "abc", "def", "ghi", "a", ",", "c");
"a", "b", "c", "abc", "def", "ghi", "a", "\\,", "c");
assertTextArray("Concatenated resources",
"@string/multiple_chars@string/multiple_labels@string/multiple_chars_with_comma",
"a", "b", "cabc", "def", "ghia", ",", "c");
"a", "b", "cabc", "def", "ghia", "\\,", "c");
assertTextArray("Concatenated resource and literal",
"abc@string/multiple_labels",
"abcabc", "def", "ghi");