From 5852a2594f4cce518e0b18069c895c2f8561d093 Mon Sep 17 00:00:00 2001 From: "Tadashi G. Takaoka" Date: Thu, 2 Feb 2012 17:25:07 +0900 Subject: [PATCH] Parse escaped sequence strictly in CSV parser This change alters the CSV parser behavior. The parser only resolves * String resource reference. ["@string/res" -> ""] * Other occurrence of escape sequence will be intact. ["\x" -> "\x"] Before this change, escape sequence in moreKeys string is parsed three times. At first in parsing string resource, next in CSV parser, and at last in KeySpecParser. So that representing single escape character itself is a bit annoying, "\\\\\\\\". Now we can represent single escape character itself in string resource by "\\\\". Change-Id: Ib978e17b779cc82585eed8241ac3857508b14bc7 --- java/res/values/donottranslate-more-keys.xml | 2 +- .../keyboard/internal/KeySpecParser.java | 54 ++------ tests/res/values/donottranslate.xml | 3 + ...rTests.java => KeySpecParserCsvTests.java} | 122 +++++++++++------- 4 files changed, 90 insertions(+), 91 deletions(-) rename tests/src/com/android/inputmethod/keyboard/internal/{CsvParserTests.java => KeySpecParserCsvTests.java} (70%) diff --git a/java/res/values/donottranslate-more-keys.xml b/java/res/values/donottranslate-more-keys.xml index 07e711a02..d7b1ff5e7 100644 --- a/java/res/values/donottranslate-more-keys.xml +++ b/java/res/values/donottranslate-more-keys.xml @@ -54,7 +54,7 @@ ¢,£,$,¥,₱ ¢,$,€,¥,₱ ¢,$,€,£,¥,₱ - ":-)|:-) ,:-(|:-( ,;-)|;-) ,:-P|:-P ,=-O|=-O ,:-*|:-* ,:O|:O ,B-)|B-) ,:-$|:-$ ,:-!|:-! ,:-[|:-[ ,O:-)|O:-) ,:-\\\\\\\\|:-\\\\\\\\ ,:\'(|:\'( ,:-D|:-D " + ":-)|:-) ,:-(|:-( ,;-)|;-) ,:-P|:-P ,=-O|=-O ,:-*|:-* ,:O|:O ,B-)|B-) ,:-$|:-$ ,:-!|:-! ,:-[|:-[ ,O:-)|O:-) ,:-\\\\|:-\\\\ ,:\'(|:\'( ,:-D|:-D " "\\,,\?,!,:,-,\',\",(,),/,;,+,&,\@" 7 diff --git a/java/src/com/android/inputmethod/keyboard/internal/KeySpecParser.java b/java/src/com/android/inputmethod/keyboard/internal/KeySpecParser.java index ba12676ad..a84b469ea 100644 --- a/java/src/com/android/inputmethod/keyboard/internal/KeySpecParser.java +++ b/java/src/com/android/inputmethod/keyboard/internal/KeySpecParser.java @@ -48,7 +48,7 @@ public class KeySpecParser { private static final char ESCAPE_CHAR = '\\'; private static final char PREFIX_AT = '@'; private static final char SUFFIX_SLASH = '/'; - private static final String PREFIX_STRING = PREFIX_AT + "string"; + private static final String PREFIX_STRING = PREFIX_AT + "string" + SUFFIX_SLASH; private static final char LABEL_END = '|'; private static final String PREFIX_ICON = PREFIX_AT + "icon" + SUFFIX_SLASH; private static final String PREFIX_CODE = PREFIX_AT + "integer" + SUFFIX_SLASH; @@ -293,13 +293,11 @@ public class KeySpecParser { sb.append(res.getString(resId)); pos = end - 1; } else if (c == ESCAPE_CHAR) { - pos++; if (sb != null) { - sb.append(c); - if (pos < size) { - sb.append(text.charAt(pos)); - } + // Append both escape character and escaped character. + sb.append(text.substring(pos, Math.min(pos + 2, size))); } + pos++; } else if (sb != null) { sb.append(c); } @@ -309,10 +307,7 @@ public class KeySpecParser { private static int searchResourceNameEnd(String text, int start) { final int size = text.length(); - if (start >= size || text.charAt(start) != SUFFIX_SLASH) { - throw new RuntimeException("Resource name not specified"); - } - for (int pos = start + 1; pos < size; pos++) { + for (int pos = start; pos < size; pos++) { final char c = text.charAt(pos); // String resource name should be consisted of [a-z_0-9]. if ((c >= 'a' && c <= 'z') || c == '_' || (c >= '0' && c <= '9')) { @@ -333,7 +328,6 @@ public class KeySpecParser { return new String[] { text }; } - final StringBuilder sb = new StringBuilder(); ArrayList list = null; int start = 0; for (int pos = 0; pos < size; pos++) { @@ -342,44 +336,18 @@ public class KeySpecParser { if (list == null) { list = new ArrayList(); } - if (sb.length() == 0) { - list.add(text.substring(start, pos)); - } else { - list.add(sb.toString()); - sb.setLength(0); - } + list.add(text.substring(start, pos)); // Skip comma start = pos + 1; - continue; - } - // TODO: Only parse escaped comma. Other escaped character should be passed through - // with escaped character prefixed. - // Skip escaped sequence. - if (c == ESCAPE_CHAR) { - if (start == pos) { - // Skip escaping comma at the beginning of the text. - start++; - pos++; - } else { - if (start < pos && sb.length() == 0) { - sb.append(text.substring(start, pos)); - } - // Skip comma - pos++; - if (pos < size) { - sb.append(text.charAt(pos)); - } - } - } else if (sb.length() > 0) { - sb.append(c); + } else if (c == ESCAPE_CHAR) { + // Skip escape character and escaped character. + pos++; } } if (list == null) { - return new String[] { - sb.length() > 0 ? sb.toString() : text.substring(start) - }; + return new String[] { text.substring(start) }; } else { - list.add(sb.length() > 0 ? sb.toString() : text.substring(start)); + list.add(text.substring(start)); return list.toArray(new String[list.size()]); } } diff --git a/tests/res/values/donottranslate.xml b/tests/res/values/donottranslate.xml index bfd1c1716..d0cde71a5 100644 --- a/tests/res/values/donottranslate.xml +++ b/tests/res/values/donottranslate.xml @@ -30,11 +30,14 @@ " abc " "\\a" "\\," + "a\\,\\" "\\\\" "a\\bc" "\\abc" + "abc\\" "a\\,c" "\\,bc" + "ab\\," "\\,\\\\bc" "a\\\\c" "a,b,c" diff --git a/tests/src/com/android/inputmethod/keyboard/internal/CsvParserTests.java b/tests/src/com/android/inputmethod/keyboard/internal/KeySpecParserCsvTests.java similarity index 70% rename from tests/src/com/android/inputmethod/keyboard/internal/CsvParserTests.java rename to tests/src/com/android/inputmethod/keyboard/internal/KeySpecParserCsvTests.java index ef80d4f0c..721c801e1 100644 --- a/tests/src/com/android/inputmethod/keyboard/internal/CsvParserTests.java +++ b/tests/src/com/android/inputmethod/keyboard/internal/KeySpecParserCsvTests.java @@ -24,7 +24,7 @@ import com.android.inputmethod.latin.tests.R; import java.util.Arrays; -public class CsvParserTests extends AndroidTestCase { +public class KeySpecParserCsvTests extends AndroidTestCase { private Resources mTestResources; @Override @@ -79,6 +79,7 @@ public class CsvParserTests extends AndroidTestCase { public void testParseCsvTextSingle() { assertTextArray("Single char", "a", "a"); assertTextArray("Surrogate pair", PAIR1, PAIR1); + assertTextArray("Single escape", "\\", "\\"); assertTextArray("Space", " ", " "); assertTextArray("Single label", "abc", "abc"); assertTextArray("Single srrogate pairs label", SURROGATE2, SURROGATE2); @@ -98,36 +99,49 @@ public class CsvParserTests extends AndroidTestCase { "ab" + SURROGATE1 + "cd"); assertTextArray("Incomplete resource reference 1", "string", "string"); - assertTextArray("Incomplete resource reference 2", "@strin", "@strin"); - assertTextArray("Incomplete resource reference 3", "@" + SURROGATE2, "@" + SURROGATE2); + assertTextArray("Incomplete resource reference 2", "@string", "@string"); + assertTextArray("Incomplete resource reference 3", "string/", "string/"); + assertTextArray("Incomplete resource reference 4", "@" + SURROGATE2, "@" + SURROGATE2); } public void testParseCsvTextSingleEscaped() { - assertTextArray("Escaped char", "\\a", "a"); - assertTextArray("Escaped surrogate pair", "\\" + PAIR1, PAIR1); - assertTextArray("Escaped comma", "\\,", ","); - assertTextArray("Escaped escape", "\\\\", "\\"); - assertTextArray("Escaped label", "a\\bc", "abc"); - assertTextArray("Escaped surrogate", "a\\" + PAIR1 + "c", "a" + PAIR1 + "c"); - assertTextArray("Escaped label at beginning", "\\abc", "abc"); - assertTextArray("Escaped surrogate at beginning", "\\" + SURROGATE2, SURROGATE2); - assertTextArray("Escaped label with comma", "a\\,c", "a,c"); - assertTextArray("Escaped surrogate with comma", PAIR1 + "\\," + PAIR2, PAIR1 + "," + PAIR2); - assertTextArray("Escaped label with comma at beginning", "\\,bc", ",bc"); + assertTextArray("Escaped char", "\\a", "\\a"); + assertTextArray("Escaped surrogate pair", "\\" + PAIR1, "\\" + PAIR1); + assertTextArray("Escaped comma", "\\,", "\\,"); + assertTextArray("Escaped comma escape", "a\\,\\", "a\\,\\"); + assertTextArray("Escaped escape", "\\\\", "\\\\"); + assertTextArray("Escaped label", "a\\bc", "a\\bc"); + assertTextArray("Escaped surrogate", "a\\" + PAIR1 + "c", "a\\" + PAIR1 + "c"); + assertTextArray("Escaped label at beginning", "\\abc", "\\abc"); + assertTextArray("Escaped surrogate at beginning", "\\" + SURROGATE2, "\\" + SURROGATE2); + assertTextArray("Escaped label at end", "abc\\", "abc\\"); + assertTextArray("Escaped surrogate at end", SURROGATE2 + "\\", SURROGATE2 + "\\"); + assertTextArray("Escaped label with comma", "a\\,c", "a\\,c"); + assertTextArray("Escaped surrogate with comma", + PAIR1 + "\\," + PAIR2, PAIR1 + "\\," + PAIR2); + assertTextArray("Escaped label with comma at beginning", "\\,bc", "\\,bc"); assertTextArray("Escaped surrogate with comma at beginning", - "\\," + SURROGATE1, "," + SURROGATE1); - assertTextArray("Escaped label with successive", "\\,\\\\bc", ",\\bc"); + "\\," + SURROGATE1, "\\," + SURROGATE1); + assertTextArray("Escaped label with comma at end", "ab\\,", "ab\\,"); + assertTextArray("Escaped surrogate with comma at end", + SURROGATE2 + "\\,", SURROGATE2 + "\\,"); + assertTextArray("Escaped label with successive", "\\,\\\\bc", "\\,\\\\bc"); assertTextArray("Escaped surrogate with successive", - "\\,\\\\" + SURROGATE1, ",\\" + SURROGATE1); - assertTextArray("Escaped label with escape", "a\\\\c", "a\\c"); + "\\,\\\\" + SURROGATE1, "\\,\\\\" + SURROGATE1); + assertTextArray("Escaped label with escape", "a\\\\c", "a\\\\c"); assertTextArray("Escaped surrogate with escape", - PAIR1 + "\\\\" + PAIR2, PAIR1 + "\\" + PAIR2); + PAIR1 + "\\\\" + PAIR2, PAIR1 + "\\\\" + PAIR2); - assertTextArray("Escaped @string", "\\@string/empty_string", "@string/empty_string"); + assertTextArray("Escaped @string", "\\@string", "\\@string"); + assertTextArray("Escaped @string/", "\\@string/", "\\@string/"); + assertTextArray("Escaped @string/", "\\@string/empty_string", "\\@string/empty_string"); } public void testParseCsvTextMulti() { assertTextArray("Multiple chars", "a,b,c", "a", "b", "c"); + assertTextArray("Multiple chars", "a,b,\\c", "a", "b", "\\c"); + assertTextArray("Multiple chars and escape at beginning and end", + "\\a,b,\\c\\", "\\a", "b", "\\c\\"); assertTextArray("Multiple surrogates", PAIR1 + "," + PAIR2 + "," + PAIR3, PAIR1, PAIR2, PAIR3); assertTextArray("Multiple chars surrounded by spaces", " a , b , c ", " a ", " b ", " c "); @@ -139,24 +153,24 @@ public class CsvParserTests extends AndroidTestCase { } public void testParseCsvTextMultiEscaped() { - assertTextArray("Multiple chars with comma", "a,\\,,c", "a", ",", "c"); + assertTextArray("Multiple chars with comma", "a,\\,,c", "a", "\\,", "c"); assertTextArray("Multiple chars with comma surrounded by spaces", " a , \\, , c ", - " a ", " , ", " c "); - assertTextArray("Multiple labels with escape", "\\abc,d\\ef,gh\\i", "abc", "def", "ghi"); + " a ", " \\, ", " c "); + assertTextArray("Multiple labels with escape", + "\\abc,d\\ef,gh\\i", "\\abc", "d\\ef", "gh\\i"); assertTextArray("Multiple labels with escape surrounded by spaces", - " \\abc , d\\ef , gh\\i ", " abc ", " def ", " ghi "); + " \\abc , d\\ef , gh\\i ", " \\abc ", " d\\ef ", " gh\\i "); assertTextArray("Multiple labels with comma and escape", - "ab\\\\,d\\\\\\,,g\\,i", "ab\\", "d\\,", "g,i"); + "ab\\\\,d\\\\\\,,g\\,i", "ab\\\\", "d\\\\\\,", "g\\,i"); assertTextArray("Multiple labels with comma and escape surrounded by spaces", - " ab\\\\ , d\\\\\\, , g\\,i ", " ab\\ ", " d\\, ", " g,i "); + " ab\\\\ , d\\\\\\, , g\\,i ", " ab\\\\ ", " d\\\\\\, ", " g\\,i "); assertTextArray("Multiple escaped @string", "\\@,\\@string/empty_string", - "@", "@string/empty_string"); + "\\@", "\\@string/empty_string"); } public void testParseCsvResourceError() { - assertError("Incomplete resource name 1", "@string", "@string"); - assertError("Incomplete resource name 2", "@string/", "@string/"); + assertError("Incomplete resource name", "@string/", "@string/"); assertError("Non existing resource", "@string/non_existing"); } @@ -182,27 +196,36 @@ public class CsvParserTests extends AndroidTestCase { "@string/spaces_at_end_of_label", "abc "); assertTextArray("label surrounded by spaces", "@string/label_surrounded_by_spaces", " abc "); + + assertTextArray("Escape and single char", + "\\\\@string/single_char", "\\\\a"); } public void testParseCsvResourceSingleEscaped() { assertTextArray("Escaped char", - "@string/escaped_char", "a"); + "@string/escaped_char", "\\a"); assertTextArray("Escaped comma", - "@string/escaped_comma", ","); + "@string/escaped_comma", "\\,"); + assertTextArray("Escaped comma escape", + "@string/escaped_comma_escape", "a\\,\\"); assertTextArray("Escaped escape", - "@string/escaped_escape", "\\"); + "@string/escaped_escape", "\\\\"); assertTextArray("Escaped label", - "@string/escaped_label", "abc"); + "@string/escaped_label", "a\\bc"); assertTextArray("Escaped label at beginning", - "@string/escaped_label_at_beginning", "abc"); + "@string/escaped_label_at_beginning", "\\abc"); + assertTextArray("Escaped label at end", + "@string/escaped_label_at_end", "abc\\"); assertTextArray("Escaped label with comma", - "@string/escaped_label_with_comma", "a,c"); + "@string/escaped_label_with_comma", "a\\,c"); assertTextArray("Escaped label with comma at beginning", - "@string/escaped_label_with_comma_at_beginning", ",bc"); + "@string/escaped_label_with_comma_at_beginning", "\\,bc"); + assertTextArray("Escaped label with comma at end", + "@string/escaped_label_with_comma_at_end", "ab\\,"); assertTextArray("Escaped label with successive", - "@string/escaped_label_with_successive", ",\\bc"); + "@string/escaped_label_with_successive", "\\,\\\\bc"); assertTextArray("Escaped label with escape", - "@string/escaped_label_with_escape", "a\\c"); + "@string/escaped_label_with_escape", "a\\\\c"); } public void testParseCsvResourceMulti() { @@ -220,36 +243,41 @@ public class CsvParserTests extends AndroidTestCase { public void testParseCsvResourcetMultiEscaped() { assertTextArray("Multiple chars with comma", "@string/multiple_chars_with_comma", - "a", ",", "c"); + "a", "\\,", "c"); assertTextArray("Multiple chars with comma surrounded by spaces", "@string/multiple_chars_with_comma_surrounded_by_spaces", - " a ", " , ", " c "); + " a ", " \\, ", " c "); assertTextArray("Multiple labels with escape", "@string/multiple_labels_with_escape", - "abc", "def", "ghi"); + "\\abc", "d\\ef", "gh\\i"); assertTextArray("Multiple labels with escape surrounded by spaces", "@string/multiple_labels_with_escape_surrounded_by_spaces", - " abc ", " def ", " ghi "); + " \\abc ", " d\\ef ", " gh\\i "); assertTextArray("Multiple labels with comma and escape", "@string/multiple_labels_with_comma_and_escape", - "ab\\", "d\\,", "g,i"); + "ab\\\\", "d\\\\\\,", "g\\,i"); assertTextArray("Multiple labels with comma and escape surrounded by spaces", "@string/multiple_labels_with_comma_and_escape_surrounded_by_spaces", - " ab\\ ", " d\\, ", " g,i "); + " ab\\\\ ", " d\\\\\\, ", " g\\,i "); } public void testParseMultipleResources() { assertTextArray("Literals and resources", "1,@string/multiple_chars,z", "1", "a", "b", "c", "z"); + assertTextArray("Literals and resources and escape at end", + "\\1,@string/multiple_chars,z\\", "\\1", "a", "b", "c", "z\\"); assertTextArray("Multiple single resource chars and labels", "@string/single_char,@string/single_label,@string/escaped_comma", - "a", "abc", ","); + "a", "abc", "\\,"); + assertTextArray("Multiple single resource chars and labels 2", + "@string/single_char,@string/single_label,@string/escaped_comma_escape", + "a", "abc", "a\\,\\"); assertTextArray("Multiple multiple resource chars and labels", "@string/multiple_chars,@string/multiple_labels,@string/multiple_chars_with_comma", - "a", "b", "c", "abc", "def", "ghi", "a", ",", "c"); + "a", "b", "c", "abc", "def", "ghi", "a", "\\,", "c"); assertTextArray("Concatenated resources", "@string/multiple_chars@string/multiple_labels@string/multiple_chars_with_comma", - "a", "b", "cabc", "def", "ghia", ",", "c"); + "a", "b", "cabc", "def", "ghia", "\\,", "c"); assertTextArray("Concatenated resource and literal", "abc@string/multiple_labels", "abcabc", "def", "ghi");