From 3ceeb6bcc607b421fd6984c70664bcfd30bde1c2 Mon Sep 17 00:00:00 2001 From: "Tadashi G. Takaoka" Date: Thu, 23 May 2013 02:07:55 -0700 Subject: [PATCH] Add CsvUtils that conform to RFC 4180 Bug: 9070153 Change-Id: I66e55dd2f30fddfeb849edb97bc1cbda6b126d92 --- .../inputmethod/latin/utils/CsvUtils.java | 319 +++++++++++++ .../latin/utils/CsvUtilsTests.java | 424 ++++++++++++++++++ 2 files changed, 743 insertions(+) create mode 100644 java/src/com/android/inputmethod/latin/utils/CsvUtils.java create mode 100644 tests/src/com/android/inputmethod/latin/utils/CsvUtilsTests.java diff --git a/java/src/com/android/inputmethod/latin/utils/CsvUtils.java b/java/src/com/android/inputmethod/latin/utils/CsvUtils.java new file mode 100644 index 000000000..999c2f0de --- /dev/null +++ b/java/src/com/android/inputmethod/latin/utils/CsvUtils.java @@ -0,0 +1,319 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.utils; + +import com.android.inputmethod.annotations.UsedForTesting; +import com.android.inputmethod.latin.CollectionUtils; + +import java.util.ArrayList; + +/** + * Utility methods for parsing and serializing Comma-Separated Values. The public APIs of this + * utility class are {@link #split(String)}, {@link #split(int,String)}, {@link #join(String)}, + * {@link #join(int,String...)}, and {@link #join(int,int[],String...)}. + * + * This class implements CSV parsing and serializing methods conforming to RFC 4180 with an + * exception: + * These methods can't handle new line code escaped in double quotes. + */ +@UsedForTesting +public final class CsvUtils { + private CsvUtils() { + // This utility class is not publicly instantiable. + } + + public static final int SPLIT_FLAGS_NONE = 0x0; + /** + * A flag for {@link #split(int,String)}. If this flag is specified, the method will trim + * spaces around fields before splitting. Note that this behavior doesn't conform to RFC 4180. + */ + public static final int SPLIT_FLAGS_TRIM_SPACES = 0x1; + + public static final int JOIN_FLAGS_NONE = 0x0; + /** + * A flag for {@link #join(int,String...)} and {@link #join(int,int[],String...)}. If this + * flag is specified, these methods surround each field with double quotes before joining. + */ + public static final int JOIN_FLAGS_ALWAYS_QUOTED = 0x1; + /** + * A flag for {@link #join(int,String...)} and {@link #join(int,int[],String...)}. If this + * flag is specified, these methods add an extra space just after the comma separator. Note that + * this behavior doesn't conform to RFC 4180. + */ + public static final int JOIN_FLAGS_EXTRA_SPACE = 0x2; + + // Note that none of these characters match high or low surrogate characters, so we need not + // take care of matching by code point. + private static final char COMMA = ','; + private static final char SPACE = ' '; + private static final char QUOTE = '"'; + + @SuppressWarnings("serial") + public static class CsvParseException extends RuntimeException { + public CsvParseException(final String message) { + super(message); + } + } + + /** + * Find the first non-space character in the text. + * + * @param text the text to be searched. + * @param fromIndex the index to start the search from, inclusive. + * @return the index of the first occurrence of the non-space character in the + * text that is greater than or equal to fromIndex, or the length of + * the text if the character does not occur. + */ + private static int indexOfNonSpace(final String text, final int fromIndex) { + final int length = text.length(); + if (fromIndex < 0 || fromIndex > length) { + throw new IllegalArgumentException("text=" + text + " fromIndex=" + fromIndex); + } + int index = fromIndex; + while (index < length && text.charAt(index) == SPACE) { + index++; + } + return index; + } + + /** + * Find the last non-space character in the text. + * + * @param text the text to be searched. + * @param fromIndex the index to start the search from, exclusive. + * @param toIndex the index to end the search at, inclusive. Usually toIndex + * points a non-space character. + * @return the index of the last occurrence of the non-space character in the + * text, exclusive. It is less than fromIndex and greater than + * toIndex, or toIndex if the character does not occur. + */ + private static int lastIndexOfNonSpace(final String text, final int fromIndex, + final int toIndex) { + if (toIndex < 0 || fromIndex > text.length() || fromIndex < toIndex) { + throw new IllegalArgumentException( + "text=" + text + " fromIndex=" + fromIndex + " toIndex=" + toIndex); + } + int index = fromIndex; + while (index > toIndex && text.charAt(index - 1) == SPACE) { + index--; + } + return index; + } + + /** + * Find the index of a comma separator. The search takes account of quoted fields and escape + * quotes. + * + * @param text the text to be searched. + * @param fromIndex the index to start the search from, inclusive. + * @return the index of the comma separator, exclusive. + */ + private static int indexOfSeparatorComma(final String text, final int fromIndex) { + final int length = text.length(); + if (fromIndex < 0 || fromIndex > length) { + throw new IllegalArgumentException("text=" + text + " fromIndex=" + fromIndex); + } + final boolean isQuoted = (length - fromIndex > 0 && text.charAt(fromIndex) == QUOTE); + for (int index = fromIndex + (isQuoted ? 1 : 0); index < length; index++) { + final char c = text.charAt(index); + if (c == COMMA && !isQuoted) { + return index; + } + if (c == QUOTE) { + final int nextIndex = index + 1; + if (nextIndex < length && text.charAt(nextIndex) == QUOTE) { + // Quoted quote. + index = nextIndex; + continue; + } + // Closing quote. + final int endIndex = text.indexOf(COMMA, nextIndex); + return endIndex < 0 ? length : endIndex; + } + } + return length; + } + + /** + * Removing any enclosing QUOTEs (U+0022), and convert any two consecutive QUOTEs into + * one QUOTE. + * + * @param text the CSV field text that may have enclosing QUOTEs and escaped QUOTE character. + * @return the text that has been removed enclosing quotes and converted two consecutive QUOTEs + * into one QUOTE. + */ + @UsedForTesting + /* private */ static String unescapeField(final String text) { + StringBuilder sb = null; + final int length = text.length(); + final boolean isQuoted = (length > 0 && text.charAt(0) == QUOTE); + int start = isQuoted ? 1 : 0; + int end = start; + while (start <= length && (end = text.indexOf(QUOTE, start)) >= start) { + final int nextIndex = end + 1; + if (nextIndex == length && isQuoted) { + // Closing quote. + break; + } + if (nextIndex < length && text.charAt(nextIndex) == QUOTE) { + if (!isQuoted) { + throw new CsvParseException("Escaped quote in text"); + } + // Quoted quote. + if (sb == null) { + sb = new StringBuilder(); + } + sb.append(text.substring(start, nextIndex)); + start = nextIndex + 1; + } else { + throw new CsvParseException( + isQuoted ? "Raw quote in quoted text" : "Raw quote in text"); + } + } + if (end < 0 && isQuoted) { + throw new CsvParseException("Unterminated quote"); + } + if (end < 0) { + end = length; + } + if (sb != null && start < length) { + sb.append(text.substring(start, end)); + } + return sb == null ? text.substring(start, end) : sb.toString(); + } + + /** + * Split the CSV text into fields. The leading and trailing spaces of the each field can be + * trimmed optionally. + * + * @param splitFlags flags for split behavior. {@link #SPLIT_FLAGS_TRIM_SPACES} will trim + * spaces around each fields. + * @param line the text of CSV fields. + * @return the array of unescaped CVS fields. + * @throws CsvParseException + */ + @UsedForTesting + public static String[] split(final int splitFlags, final String line) throws CsvParseException { + final boolean trimSpaces = (splitFlags & SPLIT_FLAGS_TRIM_SPACES) != 0; + final ArrayList fields = CollectionUtils.newArrayList(); + final int length = line.length(); + int start = 0; + do { + final int csvStart = trimSpaces ? indexOfNonSpace(line, start) : start; + final int end = indexOfSeparatorComma(line, csvStart); + final int csvEnd = trimSpaces ? lastIndexOfNonSpace(line, end, csvStart) : end; + final String csvText = unescapeField(line.substring(csvStart, csvEnd)); + fields.add(csvText); + start = end + 1; + } while (start <= length); + return fields.toArray(new String[fields.size()]); + } + + @UsedForTesting + public static String[] split(final String line) throws CsvParseException { + return split(SPLIT_FLAGS_NONE, line); + } + + /** + * Convert the raw CSV field text to the escaped text. It adds enclosing QUOTEs (U+0022) if the + * raw value contains any QUOTE or comma. Also it converts any QUOTE character into two + * consecutive QUOTE characters. + * + * @param text the raw CSV field text to be escaped. + * @param alwaysQuoted true if the escaped text should always be enclosed by QUOTEs. + * @return the escaped text. + */ + @UsedForTesting + /* private */ static String escapeField(final String text, final boolean alwaysQuoted) { + StringBuilder sb = null; + boolean needsQuoted = alwaysQuoted; + final int length = text.length(); + int indexToBeAppended = 0; + for (int index = indexToBeAppended; index < length; index++) { + final char c = text.charAt(index); + if (c == COMMA) { + needsQuoted = true; + } else if (c == QUOTE) { + needsQuoted = true; + if (sb == null) { + sb = new StringBuilder(); + } + sb.append(text.substring(indexToBeAppended, index)); + indexToBeAppended = index + 1; + sb.append(QUOTE); // escaping quote. + sb.append(QUOTE); // escaped quote. + } + } + if (sb != null && indexToBeAppended < length) { + sb.append(text.substring(indexToBeAppended)); + } + final String escapedText = (sb == null) ? text : sb.toString(); + return needsQuoted ? QUOTE + escapedText + QUOTE : escapedText; + } + + private static final String SPACES = " "; + + private static void padToColumn(final StringBuilder sb, final int column) { + int padding; + while ((padding = column - sb.length()) > 0) { + final String spaces = SPACES.substring(0, Math.min(padding, SPACES.length())); + sb.append(spaces); + } + } + + /** + * Join CSV text fields with comma. The column positions of the fields can be specified + * optionally. Surround each fields with double quotes before joining. + * + * @param joinFlags flags for join behavior. {@link #JOIN_FLAGS_EXTRA_SPACE} will add an extra + * space after each comma separator. {@link #JOIN_FLAGS_ALWAYS_QUOTED} will always add + * surrounding quotes to each element. + * @param columnPositions the array of column positions of the fields. It can be shorter than + * fields or null. Note that specifying the array column positions of the fields + * doesn't conform to RFC 4180. + * @param fields the CSV text fields. + * @return the string of the joined and escaped fields. + */ + @UsedForTesting + public static String join(final int joinFlags, final int columnPositions[], + final String... fields) { + final boolean alwaysQuoted = (joinFlags & JOIN_FLAGS_ALWAYS_QUOTED) != 0; + final String separator = COMMA + ((joinFlags & JOIN_FLAGS_EXTRA_SPACE) != 0 ? " " : ""); + final StringBuilder sb = new StringBuilder(); + for (int index = 0; index < fields.length; index++) { + if (index > 0) { + sb.append(separator); + } + if (columnPositions != null && index < columnPositions.length) { + padToColumn(sb, columnPositions[index]); + } + final String escapedText = escapeField(fields[index], alwaysQuoted); + sb.append(escapedText); + } + return sb.toString(); + } + + @UsedForTesting + public static String join(final int joinFlags, final String... fields) { + return join(joinFlags, null, fields); + } + + @UsedForTesting + public static String join(final String... fields) { + return join(JOIN_FLAGS_NONE, null, fields); + } +} diff --git a/tests/src/com/android/inputmethod/latin/utils/CsvUtilsTests.java b/tests/src/com/android/inputmethod/latin/utils/CsvUtilsTests.java new file mode 100644 index 000000000..a0fa8fe4b --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/utils/CsvUtilsTests.java @@ -0,0 +1,424 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.utils; + +import android.test.AndroidTestCase; +import android.test.suitebuilder.annotation.SmallTest; + +import com.android.inputmethod.latin.utils.CsvUtils.CsvParseException; + +import java.util.Arrays; + +@SmallTest +public class CsvUtilsTests extends AndroidTestCase { + public void testUnescape() { + assertEquals("", CsvUtils.unescapeField("")); + assertEquals("text", CsvUtils.unescapeField("text")); // text + assertEquals("", CsvUtils.unescapeField("\"\"")); // "" + assertEquals("\"", CsvUtils.unescapeField("\"\"\"\"")); // """" -> " + assertEquals("text", CsvUtils.unescapeField("\"text\"")); // "text" -> text + assertEquals("\"text", CsvUtils.unescapeField("\"\"\"text\"")); // """text" -> "text + assertEquals("text\"", CsvUtils.unescapeField("\"text\"\"\"")); // "text""" -> text" + assertEquals("te\"xt", CsvUtils.unescapeField("\"te\"\"xt\"")); // "te""xt" -> te"xt + assertEquals("\"text\"", + CsvUtils.unescapeField("\"\"\"text\"\"\"")); // """text""" -> "text" + assertEquals("t\"e\"x\"t", + CsvUtils.unescapeField("\"t\"\"e\"\"x\"\"t\"")); // "t""e""x""t" -> t"e"x"t + } + + public void testUnescapeException() { + try { + final String text = CsvUtils.unescapeField("\""); // " + fail("Unterminated quote: text=" + text); + } catch (final CsvParseException success) { + assertEquals("Unterminated quote", success.getMessage()); + } + try { + final String text = CsvUtils.unescapeField("\"\"\""); // """ + fail("Unterminated quote: text=" + text); + } catch (final CsvParseException success) { + assertEquals("Unterminated quote", success.getMessage()); + } + try { + final String text = CsvUtils.unescapeField("\"\"\"\"\""); // """"" + fail("Unterminated quote: text=" + text); + } catch (final CsvParseException success) { + assertEquals("Unterminated quote", success.getMessage()); + } + try { + final String text = CsvUtils.unescapeField("\"text"); // "text + fail("Unterminated quote: text=" + text); + } catch (final CsvParseException success) { + assertEquals("Unterminated quote", success.getMessage()); + } + try { + final String text = CsvUtils.unescapeField("text\""); // text" + fail("Raw quote in text: text=" + text); + } catch (final CsvParseException success) { + assertEquals("Raw quote in text", success.getMessage()); + } + try { + final String text = CsvUtils.unescapeField("te\"xt"); // te"xt + fail("Raw quote in text: text=" + text); + } catch (final CsvParseException success) { + assertEquals("Raw quote in text", success.getMessage()); + } + try { + final String text = CsvUtils.unescapeField("\"\"text"); // ""text + fail("Raw quote in quoted text: text=" + text); + } catch (final CsvParseException success) { + assertEquals("Raw quote in quoted text", success.getMessage()); + } + try { + final String text = CsvUtils.unescapeField("text\"\""); // text"" + fail("Escaped quote in text: text=" + text); + } catch (final CsvParseException success) { + assertEquals("Escaped quote in text", success.getMessage()); + } + try { + final String text = CsvUtils.unescapeField("te\"\"xt"); // te""xt + fail("Escaped quote in text: text=" + text); + } catch (final CsvParseException success) { + assertEquals("Escaped quote in text", success.getMessage()); + } + try { + final String text = CsvUtils.unescapeField("\"\"text\""); // ""text" + fail("Raw quote in quoted text: text=" + text); + } catch (final CsvParseException success) { + assertEquals("Raw quote in quoted text", success.getMessage()); + } + try { + final String text = CsvUtils.unescapeField("\"text\"\""); // "text"" + fail("Unterminated quote: text=" + text); + } catch (final CsvParseException success) { + assertEquals("Unterminated quote", success.getMessage()); + } + try { + final String text = CsvUtils.unescapeField("\"te\"xt\""); // "te"xt" + fail("Raw quote in quoted text: text=" + text); + } catch (final CsvParseException success) { + assertEquals("Raw quote in quoted text", success.getMessage()); + } + try { + final String text = CsvUtils.unescapeField("\"b,c"); // "b,c + fail("Unterminated quote: text=" + text); + } catch (final CsvParseException success) { + assertEquals("Unterminated quote", success.getMessage()); + } + try { + final String text = CsvUtils.unescapeField("\",\"a\""); // ","a" + fail("Raw quote in quoted text: text=" + text); + } catch (final CsvParseException success) { + assertEquals("Raw quote in quoted text", success.getMessage()); + } + } + + private static void assertArrayEquals(final T[] expected, final T[] actual) { + if (expected == actual) { + return; + } + if (expected == null || actual == null) { + assertEquals(Arrays.toString(expected), Arrays.toString(actual)); + return; + } + if (expected.length != actual.length) { + assertEquals("[length]", Arrays.toString(expected), Arrays.toString(actual)); + return; + } + for (int i = 0; i < expected.length; i++) { + final T e = expected[i]; + final T a = actual[i]; + if (e == a) { + continue; + } + assertEquals("["+i+"]", expected[i], actual[i]); + } + } + + public void testSplit() { + assertArrayEquals(new String[]{""}, CsvUtils.split("")); + assertArrayEquals(new String[]{" "}, CsvUtils.split(" ")); + assertArrayEquals(new String[]{"text"}, CsvUtils.split("text")); + assertArrayEquals(new String[]{" a b "}, CsvUtils.split(" a b ")); + + assertArrayEquals(new String[]{"", ""}, CsvUtils.split(",")); + assertArrayEquals(new String[]{"", "", ""}, CsvUtils.split(",,")); + assertArrayEquals(new String[]{" ", " "}, CsvUtils.split(" , ")); + assertArrayEquals(new String[]{" ", " ", " "}, CsvUtils.split(" , , ")); + assertArrayEquals(new String[]{"a", "b"}, CsvUtils.split("a,b")); + assertArrayEquals(new String[]{" a ", " b "}, CsvUtils.split(" a , b ")); + + assertArrayEquals(new String[]{"text"}, + CsvUtils.split("\"text\"")); // "text" + assertArrayEquals(new String[]{" text "}, + CsvUtils.split("\" text \"")); // "_text_" + + assertArrayEquals(new String[]{""}, + CsvUtils.split("\"\"")); // "" + assertArrayEquals(new String[]{"\""}, + CsvUtils.split("\"\"\"\"")); // """" + assertArrayEquals(new String[]{"", ""}, + CsvUtils.split("\"\",\"\"")); // "","" + assertArrayEquals(new String[]{"\",\""}, + CsvUtils.split("\"\"\",\"\"\"")); // """,""" + assertArrayEquals(new String[]{"\"", "\""}, + CsvUtils.split("\"\"\"\",\"\"\"\"")); // """","""" + assertArrayEquals(new String[]{"\"", "\",\""}, + CsvUtils.split("\"\"\"\",\"\"\",\"\"\"")); // """",""",""" + assertArrayEquals(new String[]{"\",\"", "\""}, + CsvUtils.split("\"\"\",\"\"\",\"\"\"\"")); // """,""","""" + + assertArrayEquals(new String[]{" a ", " b , c "}, + CsvUtils.split(" a ,\" b , c \"")); // _a_,"_b_,_c_" + assertArrayEquals(new String[]{" a ", " b , c ", " d "}, + CsvUtils.split(" a ,\" b , c \", d ")); // _a_,"_b_,_c_",_d_ + } + + public void testSplitException() { + try { + final String[] fields = CsvUtils.split(" \"text\" "); // _"text"_ + fail("Raw quote in text: fields=" + Arrays.toString(fields)); + } catch (final CsvParseException success) { + assertEquals("Raw quote in text", success.getMessage()); + } + try { + final String[] fields = CsvUtils.split(" \" text \" "); // _"_text_"_ + fail("Raw quote in text: fields=" + Arrays.toString(fields)); + } catch (final CsvParseException success) { + assertEquals("Raw quote in text", success.getMessage()); + } + + try { + final String[] fields = CsvUtils.split("a,\"b,"); // a,",b + fail("Unterminated quote: fields=" + Arrays.toString(fields)); + } catch (final CsvParseException success) { + assertEquals("Unterminated quote", success.getMessage()); + } + try { + final String[] fields = CsvUtils.split("a,\"\"\",b"); // a,""",b + fail("Unterminated quote: fields=" + Arrays.toString(fields)); + } catch (final CsvParseException success) { + assertEquals("Unterminated quote", success.getMessage()); + } + try { + final String[] fields = CsvUtils.split("a,\"\"\"\"\",b"); // a,""""",b + fail("Unterminated quote: fields=" + Arrays.toString(fields)); + } catch (final CsvParseException success) { + assertEquals("Unterminated quote", success.getMessage()); + } + try { + final String[] fields = CsvUtils.split("a,\"b,c"); // a,"b,c + fail("Unterminated quote: fields=" + Arrays.toString(fields)); + } catch (final CsvParseException success) { + assertEquals("Unterminated quote", success.getMessage()); + } + try { + final String[] fields = CsvUtils.split("a,\",\"b,c"); // a,","b,c + fail("Raw quote in quoted text: fields=" + Arrays.toString(fields)); + } catch (final CsvParseException success) { + assertEquals("Raw quote in quoted text", success.getMessage()); + } + try { + final String[] fields = CsvUtils.split("a,\",\"b\",\",c"); // a,","b",",c + fail("Raw quote in quoted text: fields=" + Arrays.toString(fields)); + } catch (final CsvParseException success) { + assertEquals("Raw quote in quoted text", success.getMessage()); + } + } + + public void testSplitWithTrimSpaces() { + final int trimSpaces = CsvUtils.SPLIT_FLAGS_TRIM_SPACES; + assertArrayEquals(new String[]{""}, CsvUtils.split(trimSpaces, "")); + assertArrayEquals(new String[]{""}, CsvUtils.split(trimSpaces, " ")); + assertArrayEquals(new String[]{"text"}, CsvUtils.split(trimSpaces, "text")); + assertArrayEquals(new String[]{"a b"}, CsvUtils.split(trimSpaces, " a b ")); + + assertArrayEquals(new String[]{"", ""}, CsvUtils.split(trimSpaces, ",")); + assertArrayEquals(new String[]{"", "", ""}, CsvUtils.split(trimSpaces, ",,")); + assertArrayEquals(new String[]{"", ""}, CsvUtils.split(trimSpaces, " , ")); + assertArrayEquals(new String[]{"", "", ""}, CsvUtils.split(trimSpaces, " , , ")); + assertArrayEquals(new String[]{"a", "b"}, CsvUtils.split(trimSpaces, "a,b")); + assertArrayEquals(new String[]{"a", "b"}, CsvUtils.split(trimSpaces, " a , b ")); + + assertArrayEquals(new String[]{"text"}, + CsvUtils.split(trimSpaces, "\"text\"")); // "text" + assertArrayEquals(new String[]{"text"}, + CsvUtils.split(trimSpaces, " \"text\" ")); // _"text"_ + assertArrayEquals(new String[]{" text "}, + CsvUtils.split(trimSpaces, "\" text \"")); // "_text_" + assertArrayEquals(new String[]{" text "}, + CsvUtils.split(trimSpaces, " \" text \" ")); // _"_text_"_ + assertArrayEquals(new String[]{"a", "b"}, + CsvUtils.split(trimSpaces, " \"a\" , \"b\" ")); // _"a"_,_"b"_ + + assertArrayEquals(new String[]{""}, + CsvUtils.split(trimSpaces, " \"\" ")); // _""_ + assertArrayEquals(new String[]{"\""}, + CsvUtils.split(trimSpaces, " \"\"\"\" ")); // _""""_ + assertArrayEquals(new String[]{"", ""}, + CsvUtils.split(trimSpaces, " \"\" , \"\" ")); // _""_,_""_ + assertArrayEquals(new String[]{"\" , \""}, + CsvUtils.split(trimSpaces, " \"\"\" , \"\"\" ")); // _"""_,_"""_ + assertArrayEquals(new String[]{"\"", "\""}, + CsvUtils.split(trimSpaces, " \"\"\"\" , \"\"\"\" ")); // _""""_,_""""_ + assertArrayEquals(new String[]{"\"", "\" , \""}, + CsvUtils.split(trimSpaces, " \"\"\"\" , \"\"\" , \"\"\" ")); // _""""_,_"""_,_"""_ + assertArrayEquals(new String[]{"\" , \"", "\""}, + CsvUtils.split(trimSpaces, " \"\"\" , \"\"\" , \"\"\"\" ")); // _"""_,_"""_,_""""_ + + assertArrayEquals(new String[]{"a", " b , c "}, + CsvUtils.split(trimSpaces, " a , \" b , c \" ")); // _a_,_"_b_,_c_"_ + assertArrayEquals(new String[]{"a", " b , c ", "d"}, + CsvUtils.split(trimSpaces, " a, \" b , c \" , d ")); // _a,_"_b_,_c_"_,_d_ + } + + public void testEscape() { + assertEquals("", CsvUtils.escapeField("", false)); + assertEquals("plain", CsvUtils.escapeField("plain", false)); + assertEquals(" ", CsvUtils.escapeField(" ", false)); + assertEquals(" ", CsvUtils.escapeField(" ", false)); + assertEquals("a space", CsvUtils.escapeField("a space", false)); + assertEquals(" space-at-start", CsvUtils.escapeField(" space-at-start", false)); + assertEquals("space-at-end ", CsvUtils.escapeField("space-at-end ", false)); + assertEquals("a lot of spaces", CsvUtils.escapeField("a lot of spaces", false)); + assertEquals("\",\"", CsvUtils.escapeField(",", false)); + assertEquals("\",,\"", CsvUtils.escapeField(",,", false)); + assertEquals("\"a,comma\"", CsvUtils.escapeField("a,comma", false)); + assertEquals("\",comma-at-begin\"", CsvUtils.escapeField(",comma-at-begin", false)); + assertEquals("\"comma-at-end,\"", CsvUtils.escapeField("comma-at-end,", false)); + assertEquals("\",,a,lot,,,of,commas,,\"", + CsvUtils.escapeField(",,a,lot,,,of,commas,,", false)); + assertEquals("\"a comma,and a space\"", CsvUtils.escapeField("a comma,and a space", false)); + assertEquals("\"\"\"\"", CsvUtils.escapeField("\"", false)); // " -> """" + assertEquals("\"\"\"\"\"\"", CsvUtils.escapeField("\"\"", false)); // "" -> """""" + assertEquals("\"\"\"\"\"\"\"\"", CsvUtils.escapeField("\"\"\"", false)); // """ -> """""""" + assertEquals("\"\"\"text\"\"\"", + CsvUtils.escapeField("\"text\"", false)); // "text" -> """text""" + assertEquals("\"text has \"\" in middle\"", + CsvUtils.escapeField("text has \" in middle", false)); + assertEquals("\"\"\"quote,at begin\"", CsvUtils.escapeField("\"quote,at begin", false)); + assertEquals("\"quote at,end\"\"\"", CsvUtils.escapeField("quote at,end\"", false)); + assertEquals("\"\"\"quote at begin\"", CsvUtils.escapeField("\"quote at begin", false)); + assertEquals("\"quote at end\"\"\"", CsvUtils.escapeField("quote at end\"", false)); + } + + public void testEscapeWithAlwaysQuoted() { + assertEquals("\"\"", CsvUtils.escapeField("", true)); + assertEquals("\"plain\"", CsvUtils.escapeField("plain", true)); + assertEquals("\" \"", CsvUtils.escapeField(" ", true)); + assertEquals("\" \"", CsvUtils.escapeField(" ", true)); + assertEquals("\"a space\"", CsvUtils.escapeField("a space", true)); + assertEquals("\" space-at-start\"", CsvUtils.escapeField(" space-at-start", true)); + assertEquals("\"space-at-end \"", CsvUtils.escapeField("space-at-end ", true)); + assertEquals("\"a lot of spaces\"", CsvUtils.escapeField("a lot of spaces", true)); + assertEquals("\",\"", CsvUtils.escapeField(",", true)); + assertEquals("\",,\"", CsvUtils.escapeField(",,", true)); + assertEquals("\"a,comma\"", CsvUtils.escapeField("a,comma", true)); + assertEquals("\",comma-at-begin\"", CsvUtils.escapeField(",comma-at-begin", true)); + assertEquals("\"comma-at-end,\"", CsvUtils.escapeField("comma-at-end,", true)); + assertEquals("\",,a,lot,,,of,commas,,\"", + CsvUtils.escapeField(",,a,lot,,,of,commas,,", true)); + assertEquals("\"a comma,and a space\"", CsvUtils.escapeField("a comma,and a space", true)); + assertEquals("\"\"\"\"", CsvUtils.escapeField("\"", true)); // " -> """" + assertEquals("\"\"\"\"\"\"", CsvUtils.escapeField("\"\"", true)); // "" -> """""" + assertEquals("\"\"\"\"\"\"\"\"", CsvUtils.escapeField("\"\"\"", true)); // """ -> """""""" + assertEquals("\"\"\"text\"\"\"", + CsvUtils.escapeField("\"text\"", true)); // "text" -> """text""" + assertEquals("\"text has \"\" in middle\"", + CsvUtils.escapeField("text has \" in middle", true)); + assertEquals("\"\"\"quote,at begin\"", CsvUtils.escapeField("\"quote,at begin", true)); + assertEquals("\"quote at,end\"\"\"", CsvUtils.escapeField("quote at,end\"", true)); + assertEquals("\"\"\"quote at begin\"", CsvUtils.escapeField("\"quote at begin", true)); + assertEquals("\"quote at end\"\"\"", CsvUtils.escapeField("quote at end\"", true)); + } + + public void testJoinWithoutColumnPositions() { + assertEquals("", CsvUtils.join()); + assertEquals("", CsvUtils.join("")); + assertEquals(",", CsvUtils.join("", "")); + + assertEquals("text, text,text ", + CsvUtils.join("text", " text", "text ")); + assertEquals("\"\"\"\",\"\"\"\"\"\",\"\"\"text\"\"\"", + CsvUtils.join("\"", "\"\"", "\"text\"")); + assertEquals("a b,\"c,d\",\"e\"\"f\"", + CsvUtils.join("a b", "c,d", "e\"f")); + } + + public void testJoinWithoutColumnPositionsWithExtraSpace() { + final int extraSpace = CsvUtils.JOIN_FLAGS_EXTRA_SPACE; + assertEquals("", CsvUtils.join(extraSpace)); + assertEquals("", CsvUtils.join(extraSpace, "")); + assertEquals(", ", CsvUtils.join(extraSpace, "", "")); + + assertEquals("text, text, text ", + CsvUtils.join(extraSpace, "text", " text", "text ")); + // ","","text" -> """","""""","""text""" + assertEquals("\"\"\"\", \"\"\"\"\"\", \"\"\"text\"\"\"", + CsvUtils.join(extraSpace, "\"", "\"\"", "\"text\"")); + assertEquals("a b, \"c,d\", \"e\"\"f\"", + CsvUtils.join(extraSpace, "a b", "c,d", "e\"f")); + } + + public void testJoinWithoutColumnPositionsWithExtraSpaceAndAlwaysQuoted() { + final int extrSpaceAndQuoted = + CsvUtils.JOIN_FLAGS_EXTRA_SPACE | CsvUtils.JOIN_FLAGS_ALWAYS_QUOTED; + assertEquals("", CsvUtils.join(extrSpaceAndQuoted)); + assertEquals("\"\"", CsvUtils.join(extrSpaceAndQuoted, "")); + assertEquals("\"\", \"\"", CsvUtils.join(extrSpaceAndQuoted, "", "")); + + assertEquals("\"text\", \" text\", \"text \"", + CsvUtils.join(extrSpaceAndQuoted, "text", " text", "text ")); + // ","","text" -> """", """""", """text""" + assertEquals("\"\"\"\", \"\"\"\"\"\", \"\"\"text\"\"\"", + CsvUtils.join(extrSpaceAndQuoted, "\"", "\"\"", "\"text\"")); + assertEquals("\"a b\", \"c,d\", \"e\"\"f\"", + CsvUtils.join(extrSpaceAndQuoted, "a b", "c,d", "e\"f")); + } + + public void testJoinWithColumnPositions() { + final int noFlags = CsvUtils.JOIN_FLAGS_NONE; + assertEquals("", CsvUtils.join(noFlags, new int[]{})); + assertEquals(" ", CsvUtils.join(noFlags, new int[]{3}, "")); + assertEquals(" ,", CsvUtils.join(noFlags, new int[]{1}, "", "")); + assertEquals(", ", CsvUtils.join(noFlags, new int[]{0, 3}, "", "")); + + assertEquals("text, text, text ", + CsvUtils.join(noFlags, new int[]{0, 8, 15}, "text", " text", "text ")); + // ","","text" -> """", """""","""text""" + assertEquals("\"\"\"\", \"\"\"\"\"\",\"\"\"text\"\"\"", + CsvUtils.join(noFlags, new int[]{0, 8, 15}, "\"", "\"\"", "\"text\"")); + assertEquals("a b, \"c,d\", \"e\"\"f\"", + CsvUtils.join(noFlags, new int[]{0, 8, 15}, "a b", "c,d", "e\"f")); + } + + public void testJoinWithColumnPositionsWithExtraSpace() { + final int extraSpace = CsvUtils.JOIN_FLAGS_EXTRA_SPACE; + assertEquals("", CsvUtils.join(extraSpace, new int[]{})); + assertEquals(" ", CsvUtils.join(extraSpace, new int[]{3}, "")); + assertEquals(" , ", CsvUtils.join(extraSpace, new int[]{1}, "", "")); + assertEquals(", ", CsvUtils.join(extraSpace, new int[]{0, 3}, "", "")); + + assertEquals("text, text, text ", + CsvUtils.join(extraSpace, new int[]{0, 8, 15}, "text", " text", "text ")); + // ","","text" -> """", """""", """text""" + assertEquals("\"\"\"\", \"\"\"\"\"\", \"\"\"text\"\"\"", + CsvUtils.join(extraSpace, new int[]{0, 8, 15}, "\"", "\"\"", "\"text\"")); + assertEquals("a b, \"c,d\", \"e\"\"f\"", + CsvUtils.join(extraSpace, new int[]{0, 8, 15}, "a b", "c,d", "e\"f")); + } +}