Merge "Add CsvUtils that conform to RFC 4180"
commit
9d4e251826
|
@ -0,0 +1,319 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.android.inputmethod.latin.utils;
|
||||||
|
|
||||||
|
import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
|
import com.android.inputmethod.latin.CollectionUtils;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility methods for parsing and serializing Comma-Separated Values. The public APIs of this
|
||||||
|
* utility class are {@link #split(String)}, {@link #split(int,String)}, {@link #join(String)},
|
||||||
|
* {@link #join(int,String...)}, and {@link #join(int,int[],String...)}.
|
||||||
|
*
|
||||||
|
* This class implements CSV parsing and serializing methods conforming to RFC 4180 with an
|
||||||
|
* exception:
|
||||||
|
* These methods can't handle new line code escaped in double quotes.
|
||||||
|
*/
|
||||||
|
@UsedForTesting
|
||||||
|
public final class CsvUtils {
|
||||||
|
private CsvUtils() {
|
||||||
|
// This utility class is not publicly instantiable.
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final int SPLIT_FLAGS_NONE = 0x0;
|
||||||
|
/**
|
||||||
|
* A flag for {@link #split(int,String)}. If this flag is specified, the method will trim
|
||||||
|
* spaces around fields before splitting. Note that this behavior doesn't conform to RFC 4180.
|
||||||
|
*/
|
||||||
|
public static final int SPLIT_FLAGS_TRIM_SPACES = 0x1;
|
||||||
|
|
||||||
|
public static final int JOIN_FLAGS_NONE = 0x0;
|
||||||
|
/**
|
||||||
|
* A flag for {@link #join(int,String...)} and {@link #join(int,int[],String...)}. If this
|
||||||
|
* flag is specified, these methods surround each field with double quotes before joining.
|
||||||
|
*/
|
||||||
|
public static final int JOIN_FLAGS_ALWAYS_QUOTED = 0x1;
|
||||||
|
/**
|
||||||
|
* A flag for {@link #join(int,String...)} and {@link #join(int,int[],String...)}. If this
|
||||||
|
* flag is specified, these methods add an extra space just after the comma separator. Note that
|
||||||
|
* this behavior doesn't conform to RFC 4180.
|
||||||
|
*/
|
||||||
|
public static final int JOIN_FLAGS_EXTRA_SPACE = 0x2;
|
||||||
|
|
||||||
|
// Note that none of these characters match high or low surrogate characters, so we need not
|
||||||
|
// take care of matching by code point.
|
||||||
|
private static final char COMMA = ',';
|
||||||
|
private static final char SPACE = ' ';
|
||||||
|
private static final char QUOTE = '"';
|
||||||
|
|
||||||
|
@SuppressWarnings("serial")
|
||||||
|
public static class CsvParseException extends RuntimeException {
|
||||||
|
public CsvParseException(final String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the first non-space character in the text.
|
||||||
|
*
|
||||||
|
* @param text the text to be searched.
|
||||||
|
* @param fromIndex the index to start the search from, inclusive.
|
||||||
|
* @return the index of the first occurrence of the non-space character in the
|
||||||
|
* <code>text</code> that is greater than or equal to <code>fromIndex</code>, or the length of
|
||||||
|
* the <code>text</code> if the character does not occur.
|
||||||
|
*/
|
||||||
|
private static int indexOfNonSpace(final String text, final int fromIndex) {
|
||||||
|
final int length = text.length();
|
||||||
|
if (fromIndex < 0 || fromIndex > length) {
|
||||||
|
throw new IllegalArgumentException("text=" + text + " fromIndex=" + fromIndex);
|
||||||
|
}
|
||||||
|
int index = fromIndex;
|
||||||
|
while (index < length && text.charAt(index) == SPACE) {
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the last non-space character in the text.
|
||||||
|
*
|
||||||
|
* @param text the text to be searched.
|
||||||
|
* @param fromIndex the index to start the search from, exclusive.
|
||||||
|
* @param toIndex the index to end the search at, inclusive. Usually <code>toIndex</code>
|
||||||
|
* points a non-space character.
|
||||||
|
* @return the index of the last occurrence of the non-space character in the
|
||||||
|
* <code>text</code>, exclusive. It is less than <code>fromIndex</code> and greater than
|
||||||
|
* <code>toIndex</code>, or <code>toIndex</code> if the character does not occur.
|
||||||
|
*/
|
||||||
|
private static int lastIndexOfNonSpace(final String text, final int fromIndex,
|
||||||
|
final int toIndex) {
|
||||||
|
if (toIndex < 0 || fromIndex > text.length() || fromIndex < toIndex) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"text=" + text + " fromIndex=" + fromIndex + " toIndex=" + toIndex);
|
||||||
|
}
|
||||||
|
int index = fromIndex;
|
||||||
|
while (index > toIndex && text.charAt(index - 1) == SPACE) {
|
||||||
|
index--;
|
||||||
|
}
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the index of a comma separator. The search takes account of quoted fields and escape
|
||||||
|
* quotes.
|
||||||
|
*
|
||||||
|
* @param text the text to be searched.
|
||||||
|
* @param fromIndex the index to start the search from, inclusive.
|
||||||
|
* @return the index of the comma separator, exclusive.
|
||||||
|
*/
|
||||||
|
private static int indexOfSeparatorComma(final String text, final int fromIndex) {
|
||||||
|
final int length = text.length();
|
||||||
|
if (fromIndex < 0 || fromIndex > length) {
|
||||||
|
throw new IllegalArgumentException("text=" + text + " fromIndex=" + fromIndex);
|
||||||
|
}
|
||||||
|
final boolean isQuoted = (length - fromIndex > 0 && text.charAt(fromIndex) == QUOTE);
|
||||||
|
for (int index = fromIndex + (isQuoted ? 1 : 0); index < length; index++) {
|
||||||
|
final char c = text.charAt(index);
|
||||||
|
if (c == COMMA && !isQuoted) {
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
if (c == QUOTE) {
|
||||||
|
final int nextIndex = index + 1;
|
||||||
|
if (nextIndex < length && text.charAt(nextIndex) == QUOTE) {
|
||||||
|
// Quoted quote.
|
||||||
|
index = nextIndex;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Closing quote.
|
||||||
|
final int endIndex = text.indexOf(COMMA, nextIndex);
|
||||||
|
return endIndex < 0 ? length : endIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removing any enclosing QUOTEs (U+0022), and convert any two consecutive QUOTEs into
|
||||||
|
* one QUOTE.
|
||||||
|
*
|
||||||
|
* @param text the CSV field text that may have enclosing QUOTEs and escaped QUOTE character.
|
||||||
|
* @return the text that has been removed enclosing quotes and converted two consecutive QUOTEs
|
||||||
|
* into one QUOTE.
|
||||||
|
*/
|
||||||
|
@UsedForTesting
|
||||||
|
/* private */ static String unescapeField(final String text) {
|
||||||
|
StringBuilder sb = null;
|
||||||
|
final int length = text.length();
|
||||||
|
final boolean isQuoted = (length > 0 && text.charAt(0) == QUOTE);
|
||||||
|
int start = isQuoted ? 1 : 0;
|
||||||
|
int end = start;
|
||||||
|
while (start <= length && (end = text.indexOf(QUOTE, start)) >= start) {
|
||||||
|
final int nextIndex = end + 1;
|
||||||
|
if (nextIndex == length && isQuoted) {
|
||||||
|
// Closing quote.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (nextIndex < length && text.charAt(nextIndex) == QUOTE) {
|
||||||
|
if (!isQuoted) {
|
||||||
|
throw new CsvParseException("Escaped quote in text");
|
||||||
|
}
|
||||||
|
// Quoted quote.
|
||||||
|
if (sb == null) {
|
||||||
|
sb = new StringBuilder();
|
||||||
|
}
|
||||||
|
sb.append(text.substring(start, nextIndex));
|
||||||
|
start = nextIndex + 1;
|
||||||
|
} else {
|
||||||
|
throw new CsvParseException(
|
||||||
|
isQuoted ? "Raw quote in quoted text" : "Raw quote in text");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (end < 0 && isQuoted) {
|
||||||
|
throw new CsvParseException("Unterminated quote");
|
||||||
|
}
|
||||||
|
if (end < 0) {
|
||||||
|
end = length;
|
||||||
|
}
|
||||||
|
if (sb != null && start < length) {
|
||||||
|
sb.append(text.substring(start, end));
|
||||||
|
}
|
||||||
|
return sb == null ? text.substring(start, end) : sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Split the CSV text into fields. The leading and trailing spaces of the each field can be
|
||||||
|
* trimmed optionally.
|
||||||
|
*
|
||||||
|
* @param splitFlags flags for split behavior. {@link #SPLIT_FLAGS_TRIM_SPACES} will trim
|
||||||
|
* spaces around each fields.
|
||||||
|
* @param line the text of CSV fields.
|
||||||
|
* @return the array of unescaped CVS fields.
|
||||||
|
* @throws CsvParseException
|
||||||
|
*/
|
||||||
|
@UsedForTesting
|
||||||
|
public static String[] split(final int splitFlags, final String line) throws CsvParseException {
|
||||||
|
final boolean trimSpaces = (splitFlags & SPLIT_FLAGS_TRIM_SPACES) != 0;
|
||||||
|
final ArrayList<String> fields = CollectionUtils.newArrayList();
|
||||||
|
final int length = line.length();
|
||||||
|
int start = 0;
|
||||||
|
do {
|
||||||
|
final int csvStart = trimSpaces ? indexOfNonSpace(line, start) : start;
|
||||||
|
final int end = indexOfSeparatorComma(line, csvStart);
|
||||||
|
final int csvEnd = trimSpaces ? lastIndexOfNonSpace(line, end, csvStart) : end;
|
||||||
|
final String csvText = unescapeField(line.substring(csvStart, csvEnd));
|
||||||
|
fields.add(csvText);
|
||||||
|
start = end + 1;
|
||||||
|
} while (start <= length);
|
||||||
|
return fields.toArray(new String[fields.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
@UsedForTesting
|
||||||
|
public static String[] split(final String line) throws CsvParseException {
|
||||||
|
return split(SPLIT_FLAGS_NONE, line);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert the raw CSV field text to the escaped text. It adds enclosing QUOTEs (U+0022) if the
|
||||||
|
* raw value contains any QUOTE or comma. Also it converts any QUOTE character into two
|
||||||
|
* consecutive QUOTE characters.
|
||||||
|
*
|
||||||
|
* @param text the raw CSV field text to be escaped.
|
||||||
|
* @param alwaysQuoted true if the escaped text should always be enclosed by QUOTEs.
|
||||||
|
* @return the escaped text.
|
||||||
|
*/
|
||||||
|
@UsedForTesting
|
||||||
|
/* private */ static String escapeField(final String text, final boolean alwaysQuoted) {
|
||||||
|
StringBuilder sb = null;
|
||||||
|
boolean needsQuoted = alwaysQuoted;
|
||||||
|
final int length = text.length();
|
||||||
|
int indexToBeAppended = 0;
|
||||||
|
for (int index = indexToBeAppended; index < length; index++) {
|
||||||
|
final char c = text.charAt(index);
|
||||||
|
if (c == COMMA) {
|
||||||
|
needsQuoted = true;
|
||||||
|
} else if (c == QUOTE) {
|
||||||
|
needsQuoted = true;
|
||||||
|
if (sb == null) {
|
||||||
|
sb = new StringBuilder();
|
||||||
|
}
|
||||||
|
sb.append(text.substring(indexToBeAppended, index));
|
||||||
|
indexToBeAppended = index + 1;
|
||||||
|
sb.append(QUOTE); // escaping quote.
|
||||||
|
sb.append(QUOTE); // escaped quote.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (sb != null && indexToBeAppended < length) {
|
||||||
|
sb.append(text.substring(indexToBeAppended));
|
||||||
|
}
|
||||||
|
final String escapedText = (sb == null) ? text : sb.toString();
|
||||||
|
return needsQuoted ? QUOTE + escapedText + QUOTE : escapedText;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final String SPACES = " ";
|
||||||
|
|
||||||
|
private static void padToColumn(final StringBuilder sb, final int column) {
|
||||||
|
int padding;
|
||||||
|
while ((padding = column - sb.length()) > 0) {
|
||||||
|
final String spaces = SPACES.substring(0, Math.min(padding, SPACES.length()));
|
||||||
|
sb.append(spaces);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Join CSV text fields with comma. The column positions of the fields can be specified
|
||||||
|
* optionally. Surround each fields with double quotes before joining.
|
||||||
|
*
|
||||||
|
* @param joinFlags flags for join behavior. {@link #JOIN_FLAGS_EXTRA_SPACE} will add an extra
|
||||||
|
* space after each comma separator. {@link #JOIN_FLAGS_ALWAYS_QUOTED} will always add
|
||||||
|
* surrounding quotes to each element.
|
||||||
|
* @param columnPositions the array of column positions of the fields. It can be shorter than
|
||||||
|
* <code>fields</code> or null. Note that specifying the array column positions of the fields
|
||||||
|
* doesn't conform to RFC 4180.
|
||||||
|
* @param fields the CSV text fields.
|
||||||
|
* @return the string of the joined and escaped <code>fields</code>.
|
||||||
|
*/
|
||||||
|
@UsedForTesting
|
||||||
|
public static String join(final int joinFlags, final int columnPositions[],
|
||||||
|
final String... fields) {
|
||||||
|
final boolean alwaysQuoted = (joinFlags & JOIN_FLAGS_ALWAYS_QUOTED) != 0;
|
||||||
|
final String separator = COMMA + ((joinFlags & JOIN_FLAGS_EXTRA_SPACE) != 0 ? " " : "");
|
||||||
|
final StringBuilder sb = new StringBuilder();
|
||||||
|
for (int index = 0; index < fields.length; index++) {
|
||||||
|
if (index > 0) {
|
||||||
|
sb.append(separator);
|
||||||
|
}
|
||||||
|
if (columnPositions != null && index < columnPositions.length) {
|
||||||
|
padToColumn(sb, columnPositions[index]);
|
||||||
|
}
|
||||||
|
final String escapedText = escapeField(fields[index], alwaysQuoted);
|
||||||
|
sb.append(escapedText);
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@UsedForTesting
|
||||||
|
public static String join(final int joinFlags, final String... fields) {
|
||||||
|
return join(joinFlags, null, fields);
|
||||||
|
}
|
||||||
|
|
||||||
|
@UsedForTesting
|
||||||
|
public static String join(final String... fields) {
|
||||||
|
return join(JOIN_FLAGS_NONE, null, fields);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,424 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.android.inputmethod.latin.utils;
|
||||||
|
|
||||||
|
import android.test.AndroidTestCase;
|
||||||
|
import android.test.suitebuilder.annotation.SmallTest;
|
||||||
|
|
||||||
|
import com.android.inputmethod.latin.utils.CsvUtils.CsvParseException;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
@SmallTest
|
||||||
|
public class CsvUtilsTests extends AndroidTestCase {
|
||||||
|
public void testUnescape() {
|
||||||
|
assertEquals("", CsvUtils.unescapeField(""));
|
||||||
|
assertEquals("text", CsvUtils.unescapeField("text")); // text
|
||||||
|
assertEquals("", CsvUtils.unescapeField("\"\"")); // ""
|
||||||
|
assertEquals("\"", CsvUtils.unescapeField("\"\"\"\"")); // """" -> "
|
||||||
|
assertEquals("text", CsvUtils.unescapeField("\"text\"")); // "text" -> text
|
||||||
|
assertEquals("\"text", CsvUtils.unescapeField("\"\"\"text\"")); // """text" -> "text
|
||||||
|
assertEquals("text\"", CsvUtils.unescapeField("\"text\"\"\"")); // "text""" -> text"
|
||||||
|
assertEquals("te\"xt", CsvUtils.unescapeField("\"te\"\"xt\"")); // "te""xt" -> te"xt
|
||||||
|
assertEquals("\"text\"",
|
||||||
|
CsvUtils.unescapeField("\"\"\"text\"\"\"")); // """text""" -> "text"
|
||||||
|
assertEquals("t\"e\"x\"t",
|
||||||
|
CsvUtils.unescapeField("\"t\"\"e\"\"x\"\"t\"")); // "t""e""x""t" -> t"e"x"t
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testUnescapeException() {
|
||||||
|
try {
|
||||||
|
final String text = CsvUtils.unescapeField("\""); // "
|
||||||
|
fail("Unterminated quote: text=" + text);
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Unterminated quote", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String text = CsvUtils.unescapeField("\"\"\""); // """
|
||||||
|
fail("Unterminated quote: text=" + text);
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Unterminated quote", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String text = CsvUtils.unescapeField("\"\"\"\"\""); // """""
|
||||||
|
fail("Unterminated quote: text=" + text);
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Unterminated quote", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String text = CsvUtils.unescapeField("\"text"); // "text
|
||||||
|
fail("Unterminated quote: text=" + text);
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Unterminated quote", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String text = CsvUtils.unescapeField("text\""); // text"
|
||||||
|
fail("Raw quote in text: text=" + text);
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Raw quote in text", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String text = CsvUtils.unescapeField("te\"xt"); // te"xt
|
||||||
|
fail("Raw quote in text: text=" + text);
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Raw quote in text", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String text = CsvUtils.unescapeField("\"\"text"); // ""text
|
||||||
|
fail("Raw quote in quoted text: text=" + text);
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Raw quote in quoted text", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String text = CsvUtils.unescapeField("text\"\""); // text""
|
||||||
|
fail("Escaped quote in text: text=" + text);
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Escaped quote in text", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String text = CsvUtils.unescapeField("te\"\"xt"); // te""xt
|
||||||
|
fail("Escaped quote in text: text=" + text);
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Escaped quote in text", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String text = CsvUtils.unescapeField("\"\"text\""); // ""text"
|
||||||
|
fail("Raw quote in quoted text: text=" + text);
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Raw quote in quoted text", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String text = CsvUtils.unescapeField("\"text\"\""); // "text""
|
||||||
|
fail("Unterminated quote: text=" + text);
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Unterminated quote", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String text = CsvUtils.unescapeField("\"te\"xt\""); // "te"xt"
|
||||||
|
fail("Raw quote in quoted text: text=" + text);
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Raw quote in quoted text", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String text = CsvUtils.unescapeField("\"b,c"); // "b,c
|
||||||
|
fail("Unterminated quote: text=" + text);
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Unterminated quote", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String text = CsvUtils.unescapeField("\",\"a\""); // ","a"
|
||||||
|
fail("Raw quote in quoted text: text=" + text);
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Raw quote in quoted text", success.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T> void assertArrayEquals(final T[] expected, final T[] actual) {
|
||||||
|
if (expected == actual) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (expected == null || actual == null) {
|
||||||
|
assertEquals(Arrays.toString(expected), Arrays.toString(actual));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (expected.length != actual.length) {
|
||||||
|
assertEquals("[length]", Arrays.toString(expected), Arrays.toString(actual));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < expected.length; i++) {
|
||||||
|
final T e = expected[i];
|
||||||
|
final T a = actual[i];
|
||||||
|
if (e == a) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
assertEquals("["+i+"]", expected[i], actual[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSplit() {
|
||||||
|
assertArrayEquals(new String[]{""}, CsvUtils.split(""));
|
||||||
|
assertArrayEquals(new String[]{" "}, CsvUtils.split(" "));
|
||||||
|
assertArrayEquals(new String[]{"text"}, CsvUtils.split("text"));
|
||||||
|
assertArrayEquals(new String[]{" a b "}, CsvUtils.split(" a b "));
|
||||||
|
|
||||||
|
assertArrayEquals(new String[]{"", ""}, CsvUtils.split(","));
|
||||||
|
assertArrayEquals(new String[]{"", "", ""}, CsvUtils.split(",,"));
|
||||||
|
assertArrayEquals(new String[]{" ", " "}, CsvUtils.split(" , "));
|
||||||
|
assertArrayEquals(new String[]{" ", " ", " "}, CsvUtils.split(" , , "));
|
||||||
|
assertArrayEquals(new String[]{"a", "b"}, CsvUtils.split("a,b"));
|
||||||
|
assertArrayEquals(new String[]{" a ", " b "}, CsvUtils.split(" a , b "));
|
||||||
|
|
||||||
|
assertArrayEquals(new String[]{"text"},
|
||||||
|
CsvUtils.split("\"text\"")); // "text"
|
||||||
|
assertArrayEquals(new String[]{" text "},
|
||||||
|
CsvUtils.split("\" text \"")); // "_text_"
|
||||||
|
|
||||||
|
assertArrayEquals(new String[]{""},
|
||||||
|
CsvUtils.split("\"\"")); // ""
|
||||||
|
assertArrayEquals(new String[]{"\""},
|
||||||
|
CsvUtils.split("\"\"\"\"")); // """"
|
||||||
|
assertArrayEquals(new String[]{"", ""},
|
||||||
|
CsvUtils.split("\"\",\"\"")); // "",""
|
||||||
|
assertArrayEquals(new String[]{"\",\""},
|
||||||
|
CsvUtils.split("\"\"\",\"\"\"")); // ""","""
|
||||||
|
assertArrayEquals(new String[]{"\"", "\""},
|
||||||
|
CsvUtils.split("\"\"\"\",\"\"\"\"")); // """",""""
|
||||||
|
assertArrayEquals(new String[]{"\"", "\",\""},
|
||||||
|
CsvUtils.split("\"\"\"\",\"\"\",\"\"\"")); // """",""","""
|
||||||
|
assertArrayEquals(new String[]{"\",\"", "\""},
|
||||||
|
CsvUtils.split("\"\"\",\"\"\",\"\"\"\"")); // """,""",""""
|
||||||
|
|
||||||
|
assertArrayEquals(new String[]{" a ", " b , c "},
|
||||||
|
CsvUtils.split(" a ,\" b , c \"")); // _a_,"_b_,_c_"
|
||||||
|
assertArrayEquals(new String[]{" a ", " b , c ", " d "},
|
||||||
|
CsvUtils.split(" a ,\" b , c \", d ")); // _a_,"_b_,_c_",_d_
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSplitException() {
|
||||||
|
try {
|
||||||
|
final String[] fields = CsvUtils.split(" \"text\" "); // _"text"_
|
||||||
|
fail("Raw quote in text: fields=" + Arrays.toString(fields));
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Raw quote in text", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String[] fields = CsvUtils.split(" \" text \" "); // _"_text_"_
|
||||||
|
fail("Raw quote in text: fields=" + Arrays.toString(fields));
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Raw quote in text", success.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
final String[] fields = CsvUtils.split("a,\"b,"); // a,",b
|
||||||
|
fail("Unterminated quote: fields=" + Arrays.toString(fields));
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Unterminated quote", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String[] fields = CsvUtils.split("a,\"\"\",b"); // a,""",b
|
||||||
|
fail("Unterminated quote: fields=" + Arrays.toString(fields));
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Unterminated quote", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String[] fields = CsvUtils.split("a,\"\"\"\"\",b"); // a,""""",b
|
||||||
|
fail("Unterminated quote: fields=" + Arrays.toString(fields));
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Unterminated quote", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String[] fields = CsvUtils.split("a,\"b,c"); // a,"b,c
|
||||||
|
fail("Unterminated quote: fields=" + Arrays.toString(fields));
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Unterminated quote", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String[] fields = CsvUtils.split("a,\",\"b,c"); // a,","b,c
|
||||||
|
fail("Raw quote in quoted text: fields=" + Arrays.toString(fields));
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Raw quote in quoted text", success.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final String[] fields = CsvUtils.split("a,\",\"b\",\",c"); // a,","b",",c
|
||||||
|
fail("Raw quote in quoted text: fields=" + Arrays.toString(fields));
|
||||||
|
} catch (final CsvParseException success) {
|
||||||
|
assertEquals("Raw quote in quoted text", success.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSplitWithTrimSpaces() {
|
||||||
|
final int trimSpaces = CsvUtils.SPLIT_FLAGS_TRIM_SPACES;
|
||||||
|
assertArrayEquals(new String[]{""}, CsvUtils.split(trimSpaces, ""));
|
||||||
|
assertArrayEquals(new String[]{""}, CsvUtils.split(trimSpaces, " "));
|
||||||
|
assertArrayEquals(new String[]{"text"}, CsvUtils.split(trimSpaces, "text"));
|
||||||
|
assertArrayEquals(new String[]{"a b"}, CsvUtils.split(trimSpaces, " a b "));
|
||||||
|
|
||||||
|
assertArrayEquals(new String[]{"", ""}, CsvUtils.split(trimSpaces, ","));
|
||||||
|
assertArrayEquals(new String[]{"", "", ""}, CsvUtils.split(trimSpaces, ",,"));
|
||||||
|
assertArrayEquals(new String[]{"", ""}, CsvUtils.split(trimSpaces, " , "));
|
||||||
|
assertArrayEquals(new String[]{"", "", ""}, CsvUtils.split(trimSpaces, " , , "));
|
||||||
|
assertArrayEquals(new String[]{"a", "b"}, CsvUtils.split(trimSpaces, "a,b"));
|
||||||
|
assertArrayEquals(new String[]{"a", "b"}, CsvUtils.split(trimSpaces, " a , b "));
|
||||||
|
|
||||||
|
assertArrayEquals(new String[]{"text"},
|
||||||
|
CsvUtils.split(trimSpaces, "\"text\"")); // "text"
|
||||||
|
assertArrayEquals(new String[]{"text"},
|
||||||
|
CsvUtils.split(trimSpaces, " \"text\" ")); // _"text"_
|
||||||
|
assertArrayEquals(new String[]{" text "},
|
||||||
|
CsvUtils.split(trimSpaces, "\" text \"")); // "_text_"
|
||||||
|
assertArrayEquals(new String[]{" text "},
|
||||||
|
CsvUtils.split(trimSpaces, " \" text \" ")); // _"_text_"_
|
||||||
|
assertArrayEquals(new String[]{"a", "b"},
|
||||||
|
CsvUtils.split(trimSpaces, " \"a\" , \"b\" ")); // _"a"_,_"b"_
|
||||||
|
|
||||||
|
assertArrayEquals(new String[]{""},
|
||||||
|
CsvUtils.split(trimSpaces, " \"\" ")); // _""_
|
||||||
|
assertArrayEquals(new String[]{"\""},
|
||||||
|
CsvUtils.split(trimSpaces, " \"\"\"\" ")); // _""""_
|
||||||
|
assertArrayEquals(new String[]{"", ""},
|
||||||
|
CsvUtils.split(trimSpaces, " \"\" , \"\" ")); // _""_,_""_
|
||||||
|
assertArrayEquals(new String[]{"\" , \""},
|
||||||
|
CsvUtils.split(trimSpaces, " \"\"\" , \"\"\" ")); // _"""_,_"""_
|
||||||
|
assertArrayEquals(new String[]{"\"", "\""},
|
||||||
|
CsvUtils.split(trimSpaces, " \"\"\"\" , \"\"\"\" ")); // _""""_,_""""_
|
||||||
|
assertArrayEquals(new String[]{"\"", "\" , \""},
|
||||||
|
CsvUtils.split(trimSpaces, " \"\"\"\" , \"\"\" , \"\"\" ")); // _""""_,_"""_,_"""_
|
||||||
|
assertArrayEquals(new String[]{"\" , \"", "\""},
|
||||||
|
CsvUtils.split(trimSpaces, " \"\"\" , \"\"\" , \"\"\"\" ")); // _"""_,_"""_,_""""_
|
||||||
|
|
||||||
|
assertArrayEquals(new String[]{"a", " b , c "},
|
||||||
|
CsvUtils.split(trimSpaces, " a , \" b , c \" ")); // _a_,_"_b_,_c_"_
|
||||||
|
assertArrayEquals(new String[]{"a", " b , c ", "d"},
|
||||||
|
CsvUtils.split(trimSpaces, " a, \" b , c \" , d ")); // _a,_"_b_,_c_"_,_d_
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEscape() {
|
||||||
|
assertEquals("", CsvUtils.escapeField("", false));
|
||||||
|
assertEquals("plain", CsvUtils.escapeField("plain", false));
|
||||||
|
assertEquals(" ", CsvUtils.escapeField(" ", false));
|
||||||
|
assertEquals(" ", CsvUtils.escapeField(" ", false));
|
||||||
|
assertEquals("a space", CsvUtils.escapeField("a space", false));
|
||||||
|
assertEquals(" space-at-start", CsvUtils.escapeField(" space-at-start", false));
|
||||||
|
assertEquals("space-at-end ", CsvUtils.escapeField("space-at-end ", false));
|
||||||
|
assertEquals("a lot of spaces", CsvUtils.escapeField("a lot of spaces", false));
|
||||||
|
assertEquals("\",\"", CsvUtils.escapeField(",", false));
|
||||||
|
assertEquals("\",,\"", CsvUtils.escapeField(",,", false));
|
||||||
|
assertEquals("\"a,comma\"", CsvUtils.escapeField("a,comma", false));
|
||||||
|
assertEquals("\",comma-at-begin\"", CsvUtils.escapeField(",comma-at-begin", false));
|
||||||
|
assertEquals("\"comma-at-end,\"", CsvUtils.escapeField("comma-at-end,", false));
|
||||||
|
assertEquals("\",,a,lot,,,of,commas,,\"",
|
||||||
|
CsvUtils.escapeField(",,a,lot,,,of,commas,,", false));
|
||||||
|
assertEquals("\"a comma,and a space\"", CsvUtils.escapeField("a comma,and a space", false));
|
||||||
|
assertEquals("\"\"\"\"", CsvUtils.escapeField("\"", false)); // " -> """"
|
||||||
|
assertEquals("\"\"\"\"\"\"", CsvUtils.escapeField("\"\"", false)); // "" -> """"""
|
||||||
|
assertEquals("\"\"\"\"\"\"\"\"", CsvUtils.escapeField("\"\"\"", false)); // """ -> """"""""
|
||||||
|
assertEquals("\"\"\"text\"\"\"",
|
||||||
|
CsvUtils.escapeField("\"text\"", false)); // "text" -> """text"""
|
||||||
|
assertEquals("\"text has \"\" in middle\"",
|
||||||
|
CsvUtils.escapeField("text has \" in middle", false));
|
||||||
|
assertEquals("\"\"\"quote,at begin\"", CsvUtils.escapeField("\"quote,at begin", false));
|
||||||
|
assertEquals("\"quote at,end\"\"\"", CsvUtils.escapeField("quote at,end\"", false));
|
||||||
|
assertEquals("\"\"\"quote at begin\"", CsvUtils.escapeField("\"quote at begin", false));
|
||||||
|
assertEquals("\"quote at end\"\"\"", CsvUtils.escapeField("quote at end\"", false));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEscapeWithAlwaysQuoted() {
|
||||||
|
assertEquals("\"\"", CsvUtils.escapeField("", true));
|
||||||
|
assertEquals("\"plain\"", CsvUtils.escapeField("plain", true));
|
||||||
|
assertEquals("\" \"", CsvUtils.escapeField(" ", true));
|
||||||
|
assertEquals("\" \"", CsvUtils.escapeField(" ", true));
|
||||||
|
assertEquals("\"a space\"", CsvUtils.escapeField("a space", true));
|
||||||
|
assertEquals("\" space-at-start\"", CsvUtils.escapeField(" space-at-start", true));
|
||||||
|
assertEquals("\"space-at-end \"", CsvUtils.escapeField("space-at-end ", true));
|
||||||
|
assertEquals("\"a lot of spaces\"", CsvUtils.escapeField("a lot of spaces", true));
|
||||||
|
assertEquals("\",\"", CsvUtils.escapeField(",", true));
|
||||||
|
assertEquals("\",,\"", CsvUtils.escapeField(",,", true));
|
||||||
|
assertEquals("\"a,comma\"", CsvUtils.escapeField("a,comma", true));
|
||||||
|
assertEquals("\",comma-at-begin\"", CsvUtils.escapeField(",comma-at-begin", true));
|
||||||
|
assertEquals("\"comma-at-end,\"", CsvUtils.escapeField("comma-at-end,", true));
|
||||||
|
assertEquals("\",,a,lot,,,of,commas,,\"",
|
||||||
|
CsvUtils.escapeField(",,a,lot,,,of,commas,,", true));
|
||||||
|
assertEquals("\"a comma,and a space\"", CsvUtils.escapeField("a comma,and a space", true));
|
||||||
|
assertEquals("\"\"\"\"", CsvUtils.escapeField("\"", true)); // " -> """"
|
||||||
|
assertEquals("\"\"\"\"\"\"", CsvUtils.escapeField("\"\"", true)); // "" -> """"""
|
||||||
|
assertEquals("\"\"\"\"\"\"\"\"", CsvUtils.escapeField("\"\"\"", true)); // """ -> """"""""
|
||||||
|
assertEquals("\"\"\"text\"\"\"",
|
||||||
|
CsvUtils.escapeField("\"text\"", true)); // "text" -> """text"""
|
||||||
|
assertEquals("\"text has \"\" in middle\"",
|
||||||
|
CsvUtils.escapeField("text has \" in middle", true));
|
||||||
|
assertEquals("\"\"\"quote,at begin\"", CsvUtils.escapeField("\"quote,at begin", true));
|
||||||
|
assertEquals("\"quote at,end\"\"\"", CsvUtils.escapeField("quote at,end\"", true));
|
||||||
|
assertEquals("\"\"\"quote at begin\"", CsvUtils.escapeField("\"quote at begin", true));
|
||||||
|
assertEquals("\"quote at end\"\"\"", CsvUtils.escapeField("quote at end\"", true));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testJoinWithoutColumnPositions() {
|
||||||
|
assertEquals("", CsvUtils.join());
|
||||||
|
assertEquals("", CsvUtils.join(""));
|
||||||
|
assertEquals(",", CsvUtils.join("", ""));
|
||||||
|
|
||||||
|
assertEquals("text, text,text ",
|
||||||
|
CsvUtils.join("text", " text", "text "));
|
||||||
|
assertEquals("\"\"\"\",\"\"\"\"\"\",\"\"\"text\"\"\"",
|
||||||
|
CsvUtils.join("\"", "\"\"", "\"text\""));
|
||||||
|
assertEquals("a b,\"c,d\",\"e\"\"f\"",
|
||||||
|
CsvUtils.join("a b", "c,d", "e\"f"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testJoinWithoutColumnPositionsWithExtraSpace() {
|
||||||
|
final int extraSpace = CsvUtils.JOIN_FLAGS_EXTRA_SPACE;
|
||||||
|
assertEquals("", CsvUtils.join(extraSpace));
|
||||||
|
assertEquals("", CsvUtils.join(extraSpace, ""));
|
||||||
|
assertEquals(", ", CsvUtils.join(extraSpace, "", ""));
|
||||||
|
|
||||||
|
assertEquals("text, text, text ",
|
||||||
|
CsvUtils.join(extraSpace, "text", " text", "text "));
|
||||||
|
// ","","text" -> """","""""","""text"""
|
||||||
|
assertEquals("\"\"\"\", \"\"\"\"\"\", \"\"\"text\"\"\"",
|
||||||
|
CsvUtils.join(extraSpace, "\"", "\"\"", "\"text\""));
|
||||||
|
assertEquals("a b, \"c,d\", \"e\"\"f\"",
|
||||||
|
CsvUtils.join(extraSpace, "a b", "c,d", "e\"f"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testJoinWithoutColumnPositionsWithExtraSpaceAndAlwaysQuoted() {
|
||||||
|
final int extrSpaceAndQuoted =
|
||||||
|
CsvUtils.JOIN_FLAGS_EXTRA_SPACE | CsvUtils.JOIN_FLAGS_ALWAYS_QUOTED;
|
||||||
|
assertEquals("", CsvUtils.join(extrSpaceAndQuoted));
|
||||||
|
assertEquals("\"\"", CsvUtils.join(extrSpaceAndQuoted, ""));
|
||||||
|
assertEquals("\"\", \"\"", CsvUtils.join(extrSpaceAndQuoted, "", ""));
|
||||||
|
|
||||||
|
assertEquals("\"text\", \" text\", \"text \"",
|
||||||
|
CsvUtils.join(extrSpaceAndQuoted, "text", " text", "text "));
|
||||||
|
// ","","text" -> """", """""", """text"""
|
||||||
|
assertEquals("\"\"\"\", \"\"\"\"\"\", \"\"\"text\"\"\"",
|
||||||
|
CsvUtils.join(extrSpaceAndQuoted, "\"", "\"\"", "\"text\""));
|
||||||
|
assertEquals("\"a b\", \"c,d\", \"e\"\"f\"",
|
||||||
|
CsvUtils.join(extrSpaceAndQuoted, "a b", "c,d", "e\"f"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testJoinWithColumnPositions() {
|
||||||
|
final int noFlags = CsvUtils.JOIN_FLAGS_NONE;
|
||||||
|
assertEquals("", CsvUtils.join(noFlags, new int[]{}));
|
||||||
|
assertEquals(" ", CsvUtils.join(noFlags, new int[]{3}, ""));
|
||||||
|
assertEquals(" ,", CsvUtils.join(noFlags, new int[]{1}, "", ""));
|
||||||
|
assertEquals(", ", CsvUtils.join(noFlags, new int[]{0, 3}, "", ""));
|
||||||
|
|
||||||
|
assertEquals("text, text, text ",
|
||||||
|
CsvUtils.join(noFlags, new int[]{0, 8, 15}, "text", " text", "text "));
|
||||||
|
// ","","text" -> """", """""","""text"""
|
||||||
|
assertEquals("\"\"\"\", \"\"\"\"\"\",\"\"\"text\"\"\"",
|
||||||
|
CsvUtils.join(noFlags, new int[]{0, 8, 15}, "\"", "\"\"", "\"text\""));
|
||||||
|
assertEquals("a b, \"c,d\", \"e\"\"f\"",
|
||||||
|
CsvUtils.join(noFlags, new int[]{0, 8, 15}, "a b", "c,d", "e\"f"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testJoinWithColumnPositionsWithExtraSpace() {
|
||||||
|
final int extraSpace = CsvUtils.JOIN_FLAGS_EXTRA_SPACE;
|
||||||
|
assertEquals("", CsvUtils.join(extraSpace, new int[]{}));
|
||||||
|
assertEquals(" ", CsvUtils.join(extraSpace, new int[]{3}, ""));
|
||||||
|
assertEquals(" , ", CsvUtils.join(extraSpace, new int[]{1}, "", ""));
|
||||||
|
assertEquals(", ", CsvUtils.join(extraSpace, new int[]{0, 3}, "", ""));
|
||||||
|
|
||||||
|
assertEquals("text, text, text ",
|
||||||
|
CsvUtils.join(extraSpace, new int[]{0, 8, 15}, "text", " text", "text "));
|
||||||
|
// ","","text" -> """", """""", """text"""
|
||||||
|
assertEquals("\"\"\"\", \"\"\"\"\"\", \"\"\"text\"\"\"",
|
||||||
|
CsvUtils.join(extraSpace, new int[]{0, 8, 15}, "\"", "\"\"", "\"text\""));
|
||||||
|
assertEquals("a b, \"c,d\", \"e\"\"f\"",
|
||||||
|
CsvUtils.join(extraSpace, new int[]{0, 8, 15}, "a b", "c,d", "e\"f"));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue