From 7a66d24c814d1913b5454cad3b52036a9f206c0b Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Sun, 11 Jan 2026 16:37:21 -0500 Subject: [PATCH 1/3] fix: DH-20645: Support CSV escaping --- src/main/java/io/deephaven/csv/CsvSpecs.java | 40 +++++- .../io/deephaven/csv/reading/CsvReader.java | 8 +- .../reading/cells/DelimitedCellGrabber.java | 70 ++++++++- .../csv/reading/cells/FixedCellGrabber.java | 2 +- .../java/io/deephaven/csv/EscapeTest.java | 136 ++++++++++++++++++ 5 files changed, 249 insertions(+), 7 deletions(-) create mode 100644 src/test/java/io/deephaven/csv/EscapeTest.java diff --git a/src/main/java/io/deephaven/csv/CsvSpecs.java b/src/main/java/io/deephaven/csv/CsvSpecs.java index fdb4ed66..21eee472 100644 --- a/src/main/java/io/deephaven/csv/CsvSpecs.java +++ b/src/main/java/io/deephaven/csv/CsvSpecs.java @@ -319,7 +319,7 @@ default Builder putParserForIndex(int index, Parser parser) { * 7-bit ASCII. The default is '{@value #defaultQuote}' For example: * *
-         * 123,"hello, there",456,
+         * 123,"hello, there",456
          * 
* * Would be read as the three fields: @@ -337,6 +337,29 @@ default Builder putParserForIndex(int index, Parser parser) { */ Builder quote(char quote); + /** + * The escape character (used when you want field or line delimiters to be interpreted as literal text, or you + * want to add C-style control characters like \b, \t, \n, \r, \f). Typically set to the backslash character + * ('\'). Must be 7-bit ASCII. The default is null, interpreted as unset. For example, with the escape character + * set to '\': + * + *
+         * 123,hello\, there\n,456
+         * 
+ * + * Would be read as the three fields: + * + * + * + * @param escape The escape property. + * @return self after modifying the escape property. + */ + Builder escape(Character escape); + /** * Whether to trim leading and trailing blanks from non-quoted values. The default is {@code true}. * @@ -399,6 +422,9 @@ void check() { final List problems = new ArrayList<>(); check7BitAscii("quote", quote(), problems); check7BitAscii("delimiter", delimiter(), problems); + if (escape() != null) { + check7BitAscii("escape", escape(), problems); + } checkNonnegative("skipRows", skipRows(), problems); checkNonnegative("skipHeaderRows", skipHeaderRows(), problems); checkNonnegative("numRows", numRows(), problems); @@ -699,7 +725,6 @@ public char delimiter() { return defaultDelimiter; } - private static final char defaultQuote = '"'; /** @@ -712,6 +737,17 @@ public char quote() { return defaultQuote; } + /** + * See {@link Builder#escape}. + * + * @return The caller-specified escape character, or null if none. + */ + @Default + @Nullable + public Character escape() { + return null; + } + /** * See {@link Builder#ignoreSurroundingSpaces}. * diff --git a/src/main/java/io/deephaven/csv/reading/CsvReader.java b/src/main/java/io/deephaven/csv/reading/CsvReader.java index 04610758..56560881 100644 --- a/src/main/java/io/deephaven/csv/reading/CsvReader.java +++ b/src/main/java/io/deephaven/csv/reading/CsvReader.java @@ -116,11 +116,15 @@ private static boolean needsUtf8Encoding(final Charset charset) { private static Result delimitedReadLogic( final CsvSpecs specs, final InputStream stream, final SinkFactory sinkFactory) throws CsvReaderException { - // These two have already been validated by CsvSpecs to be 7-bit ASCII. + final byte IllegalUtf8 = (byte) 0xff; + + // These three have already been validated by CsvSpecs to be 7-bit ASCII. final byte quoteAsByte = (byte) specs.quote(); final byte delimiterAsByte = (byte) specs.delimiter(); + final byte escapeCharAsByte = specs.escape() == null ? IllegalUtf8 : (byte) specs.escape().charValue(); final CellGrabber grabber = - new DelimitedCellGrabber(stream, quoteAsByte, delimiterAsByte, specs.ignoreSurroundingSpaces(), + new DelimitedCellGrabber(stream, quoteAsByte, escapeCharAsByte, delimiterAsByte, + specs.ignoreSurroundingSpaces(), specs.trim()); // For an "out" parameter final MutableObject firstDataRowHolder = new MutableObject<>(); diff --git a/src/main/java/io/deephaven/csv/reading/cells/DelimitedCellGrabber.java b/src/main/java/io/deephaven/csv/reading/cells/DelimitedCellGrabber.java index d235f2da..2110a13c 100644 --- a/src/main/java/io/deephaven/csv/reading/cells/DelimitedCellGrabber.java +++ b/src/main/java/io/deephaven/csv/reading/cells/DelimitedCellGrabber.java @@ -21,6 +21,11 @@ public final class DelimitedCellGrabber implements CellGrabber { private final InputStream inputStream; /** The configured CSV quote character (typically '"'). Must be 7-bit ASCII. */ private final byte quoteChar; + /** + * The configured CVS escape character. Must be 7-bit ASCII. If configured to null in CsvSpecs, we set it to the + * illegal UTF-8 byte 0xff so it has no effect. + */ + private final byte escapeChar; /** The configured CVS field delimiter (typically ','). Must be 7-bit ASCII. */ private final byte fieldDelimiter; /** Whether to trim leading and trailing blanks from non-quoted values. */ @@ -40,7 +45,8 @@ public final class DelimitedCellGrabber implements CellGrabber { * buffer[] array. But we can't do that when the input cell spans more than one buffer[] chunk, or when the input * cell does not exactly represent the output. This latter case can happen for example when an escaped quote ("") * needs to be returned as a single quotation mark ("). So if our input is hello""there, then we can't directly - * return a slice of the input array, because actually we need hello"there (one quotation mark, not two). + * return a slice of the input array, because actually we need hello"there (one quotation mark, not two). Another + * case where this can happen is when the escape character is enabled and we encounter an escape like \, or \n. */ private final GrowableByteBuffer spillBuffer; /** @@ -56,6 +62,8 @@ public final class DelimitedCellGrabber implements CellGrabber { * * @param inputStream The input, represented as UTF-8 bytes. * @param quoteChar The configured quote char. Typically " + * @param escapeChar The configured escape char. Defaults to our representation of 'none' but if the feature is + * desired, is typically set to \ * @param fieldDelimiter The configured field delimiter. Typically , * @param ignoreSurroundingSpaces Whether to ignore surrounding spaces * @param trim Whether to trim spaces inside quoted values. @@ -63,11 +71,13 @@ public final class DelimitedCellGrabber implements CellGrabber { public DelimitedCellGrabber( final InputStream inputStream, final byte quoteChar, + final byte escapeChar, final byte fieldDelimiter, final boolean ignoreSurroundingSpaces, final boolean trim) { this.inputStream = inputStream; this.quoteChar = quoteChar; + this.escapeChar = escapeChar; this.fieldDelimiter = fieldDelimiter; this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; this.trim = trim; @@ -131,10 +141,16 @@ private void processQuotedMode(final ByteSlice dest, final MutableBoolean lastIn } prevCharWasCarriageReturn = false; } - if (ch != quoteChar) { + if (ch != quoteChar && ch != escapeChar) { // Ordinary character. Note: in quoted mode we will gladly eat field and line separators. continue; } + + if (ch == escapeChar) { + processEscapeChar(); + continue; + } + // This character is a quote char. It could be the end of the cell, or it could be an escaped // quote char (e.g. ""). The way to tell is to peek ahead at the next character. if (!tryEnsureMore()) { @@ -264,10 +280,60 @@ private void finishField(final ByteSlice dest, final MutableBoolean lastInRow, ++physicalRowNum; return; } + if (ch == escapeChar) { + ++offset; + processEscapeChar(); + continue; + } + ++offset; } } + private void processEscapeChar() throws CsvReaderException { + // Spill data up to and including the escape character into the spill buffer. + // Below, we will replace the escape character with the transformed escaped character. + spillRange(); + + // This character is an escape character. In practice, it is used to either to make the next + // metacharacter like the quote or field separator normal, or to provide a C-style special character like + // newline or tab. + // However, it can't appear as the last character of the input. + if (!tryEnsureMore()) { + throw new CsvReaderException( + "The last character in the input was the escape character. This is not allowed. The escape character needs to be followed by another character."); + } + + // Consume the next char (the escaped character). Potentially transform it if it is one of the C escapes: + // characters b, t, n etc + final byte nextChar = buffer[offset++]; + final byte nextCharTransformed = transformEscapedChar(nextChar); + + // Replace the placeholder character with the transformed character + spillBuffer.data()[spillBuffer.size() - 1] = nextCharTransformed; + + // Advance the spill buffer's notion of "next start position" so it skips the escaped character. + startOffset = offset; + } + + private static byte transformEscapedChar(byte nextChar) { + // Feeling some "Reflections on Trusting Trust" realness. + switch (nextChar) { + case 'b': + return '\b'; + case 't': + return '\t'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 'f': + return '\f'; + default: + return nextChar; + } + } + /** @return true if there are more characters. */ private boolean tryEnsureMore() throws CsvReaderException { if (offset != size) { diff --git a/src/main/java/io/deephaven/csv/reading/cells/FixedCellGrabber.java b/src/main/java/io/deephaven/csv/reading/cells/FixedCellGrabber.java index 5538f21a..1bf4fbfc 100644 --- a/src/main/java/io/deephaven/csv/reading/cells/FixedCellGrabber.java +++ b/src/main/java/io/deephaven/csv/reading/cells/FixedCellGrabber.java @@ -23,7 +23,7 @@ public class FixedCellGrabber implements CellGrabber { */ public static CellGrabber makeLineGrabber(InputStream stream) { final byte IllegalUtf8 = (byte) 0xff; - return new DelimitedCellGrabber(stream, IllegalUtf8, IllegalUtf8, true, false); + return new DelimitedCellGrabber(stream, IllegalUtf8, IllegalUtf8, IllegalUtf8, true, false); } private final CellGrabber lineGrabber; diff --git a/src/test/java/io/deephaven/csv/EscapeTest.java b/src/test/java/io/deephaven/csv/EscapeTest.java new file mode 100644 index 00000000..f678b790 --- /dev/null +++ b/src/test/java/io/deephaven/csv/EscapeTest.java @@ -0,0 +1,136 @@ +package io.deephaven.csv; + +import io.deephaven.csv.parsers.Parser; +import io.deephaven.csv.reading.CsvReader; +import io.deephaven.csv.testutil.*; +import io.deephaven.csv.util.CsvReaderException; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.io.InputStream; +import java.lang.reflect.Array; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.stream.Stream; + +public class EscapeTest { + @ParameterizedTest + @MethodSource("provideTuplesForEscapeTest") + public void escapeTest(String input, Character escape, Object[] expectedValues) throws CsvReaderException { + final Charset charset = StandardCharsets.UTF_8; + final CsvSpecs specs = CsvTestUtil.defaultCsvBuilder().hasHeaderRow(false).escape(escape).build(); + final InputStream stream = CsvTestUtil.toInputStream(input, charset); + CsvReader.Result result = CsvReader.read(specs, stream, charset, CsvTestUtil.makeMySinkFactory()); + + Assertions.assertThat(result.numRows()).isEqualTo(1); + Assertions.assertThat(result.numCols()).isEqualTo(expectedValues.length); + + for (int i = 0; i != result.numCols(); ++i) { + final Object array = result.columns()[i].data(); + final Object element0 = Array.get(array, 0); + Assertions.assertThat(element0).isEqualTo(expectedValues[i]); + } + } + + private static Stream provideTuplesForEscapeTest() { + return Stream.of( + // Cases that are not surrounded with quotes + + // Deep\,haven,42 with no escape configured comes through as "Deep\", "haven", 42 + Arguments.of("Deep\\,haven,42\n", null, new Object[] {"Deep\\", "haven", 42}), + // Deep\,haven,42 with escape configured as \ comes through as "Deep,haven", 42 + Arguments.of("Deep\\,haven,42\n", '\\', new Object[] {"Deep,haven", 42}), + // Deephave\n,42 with no escape configured comes through as "Deephave\n", 42 with the literal backslash + Arguments.of("Deephave\\n,42\n", '*', new Object[] {"Deephave\\n", 42}), + // Deephave\n,42 with escape configured as \ comes through as "Deephave\n", with \n being newline + Arguments.of("Deephave\\n,42\n", '\\', new Object[] {"Deephave\n", 42}), + // Deep*,haven,42 with escape configured as * comes through as "Deep,haven", 42 + Arguments.of("Deep*,haven,42\n", '*', new Object[] {"Deep,haven", 42}), + + // Cases that are surrounded with quotes + + // "Deep,haven",42 with no escape configured comes through as "Deep,haven", 42 + // because quotation marks are another way to escape the field separator. + Arguments.of("\"Deep,haven\",42\n", null, new Object[] {"Deep,haven", 42}), + // "Deep\,haven",42 with escape configured as \ also comes through as "Deep,haven", 42 + // because the backslash escape is processed even inside quotes. + Arguments.of("\"Deep\\,haven\",42\n", '\\', new Object[] {"Deep,haven", 42}), + // "Deephave\n",42 with no escape configured comes through as "Deephave\n", 42 with the literal + // backslash + Arguments.of("\"Deephave\\n\",42\n", '*', new Object[] {"Deephave\\n", 42}), + // Deephave\n,42 with escape configured as \ comes through as "Deephave\n", with \n being newline + Arguments.of("\"Deephave\\n\",42\n", '\\', new Object[] {"Deephave\n", 42}), + // "Deep*,haven,42" with escape configured as * comes through as "Deep,haven", 42 + Arguments.of("\"Deep*,haven\",42\n", '*', new Object[] {"Deep,haven", 42}), + + // C style escapes + + // Without escape configured, C-style escapes are not special + Arguments.of("Deep\\b\\r\\n\\t\\fhaven,42\n", null, new Object[] {"Deep\\b\\r\\n\\t\\fhaven", 42}), + // With escape configured, C-style escapes are special + Arguments.of("Deep\\b\\r\\n\\t\\fhaven,42\n", '\\', new Object[] {"Deep\b\r\n\t\fhaven", 42}), + // Surrounding with quotes, without escape configured, C-style escapes are not special + Arguments.of("\"Deep\\b\\r\\n\\t\\fhaven\",42\n", null, new Object[] {"Deep\\b\\r\\n\\t\\fhaven", 42}), + // Surrounding with quotes, with escape configured, C-style escapes are special + Arguments.of("\"Deep\\b\\r\\n\\t\\fhaven\",42\n", '\\', new Object[] {"Deep\b\r\n\t\fhaven", 42}), + + // Quotation mark in the middle of unquoted text + + // Without escape configured: a quotation mark in the middle is passed through + Arguments.of("Deep\"haven,42\n", null, new Object[] {"Deep\"haven", 42}), + // With escape configured: a quotation mark in the middle is still passed through (not that interesting) + Arguments.of("Deep\"haven,42\n", '\\', new Object[] {"Deep\"haven", 42}), + // Without escape configured: an escaped quotation mark in the middle just passes through the \ and the + // " + Arguments.of("Deep\\\"haven,42\n", null, new Object[] {"Deep\\\"haven", 42}), + // With escape configured: an escaped quotation mark in the middle passes through the " + Arguments.of("Deep\\\"haven,42\n", '\\', new Object[] {"Deep\"haven", 42}), + + // Getting a quotation mark in the middle of quoted text + + // Without escape configured: a double quotation mark in the middle is passed through as a single " + Arguments.of("\"Deep\"\"haven\",42\n", null, new Object[] {"Deep\"haven", 42}), + // With escape configured: a double quotation mark in the middle is still passed through (same as above) + Arguments.of("\"Deep\"\"haven\",42\n", '\\', new Object[] {"Deep\"haven", 42}), + // With escape configured: a single escaped quotation mark in the middle passes through the " + Arguments.of("\"Deep\\\"haven\",42\n", '\\', new Object[] {"Deep\"haven", 42})); + } + + @ParameterizedTest + @MethodSource("provideTuplesForQuoteTest") + public void choiceOfQuoteTest(String input, char quote, Object[] expectedValues) throws CsvReaderException { + final Charset charset = StandardCharsets.UTF_8; + final CsvSpecs specs = CsvTestUtil.defaultCsvBuilder().hasHeaderRow(false).quote(quote).escape('\\').build(); + final InputStream stream = CsvTestUtil.toInputStream(input, charset); + CsvReader.Result result = CsvReader.read(specs, stream, charset, CsvTestUtil.makeMySinkFactory()); + + Assertions.assertThat(result.numRows()).isEqualTo(1); + Assertions.assertThat(result.numCols()).isEqualTo(expectedValues.length); + + for (int i = 0; i != result.numCols(); ++i) { + final Object array = result.columns()[i].data(); + final Object element0 = Array.get(array, 0); + Assertions.assertThat(element0).isEqualTo(expectedValues[i]); + } + } + + private static Stream provideTuplesForQuoteTest() { + return Stream.of( + // \"Deep\"haven\",42 comes through as Deep*haven, 42 + Arguments.of("\"Deep\\\"haven\",42\n", '"', new Object[] {"Deep\"haven", 42}), + // *Deep\*haven*,42 comes through as Deep*haven, 42 + Arguments.of("*Deep\\*haven*,42\n", '*', new Object[] {"Deep*haven", 42}), + // nDeephave\nn,42 comes through as Deephave\n, 42 (where \n is the newline) + // This demonstrates the fanciful example that you *can* use 'n' as a quote character, but + // you should know that \n will translate to newline, not escape your quote character. + Arguments.of("nDeephave\\nn,42\n", 'n', new Object[] {"Deephave\n", 42}), + // nDeephavennn,42 comes through as Deephaven, 42. + // Following up to the above, this shows if you use 'n' as a quote character + // and you want it in your data, you need to double it. + Arguments.of("nDeephavennn,42\n", 'n', new Object[] {"Deephaven", 42})); + } +} From 85942dc23588c4f96f26d171123cd9659070818d Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Thu, 15 Jan 2026 15:54:01 -0500 Subject: [PATCH 2/3] Respond to review feedback --- src/main/java/io/deephaven/csv/CsvSpecs.java | 6 +- .../reading/cells/DelimitedCellGrabber.java | 22 +++- .../java/io/deephaven/csv/EscapeTest.java | 115 +++++++++++------- 3 files changed, 90 insertions(+), 53 deletions(-) diff --git a/src/main/java/io/deephaven/csv/CsvSpecs.java b/src/main/java/io/deephaven/csv/CsvSpecs.java index 21eee472..a11718a7 100644 --- a/src/main/java/io/deephaven/csv/CsvSpecs.java +++ b/src/main/java/io/deephaven/csv/CsvSpecs.java @@ -339,9 +339,9 @@ default Builder putParserForIndex(int index, Parser parser) { /** * The escape character (used when you want field or line delimiters to be interpreted as literal text, or you - * want to add C-style control characters like \b, \t, \n, \r, \f). Typically set to the backslash character - * ('\'). Must be 7-bit ASCII. The default is null, interpreted as unset. For example, with the escape character - * set to '\': + * want to add the Java-style escape sequences \b, \t, \n, \r, or \f. Typically set to the backslash character + * ('\'). Must be 7-bit ASCII. We do not decode Java octal or Unicode escape sequences 0xx or uxxxx. The default + * is null, interpreted as unset. For example, with the escape character set to '\': * *
          * 123,hello\, there\n,456
diff --git a/src/main/java/io/deephaven/csv/reading/cells/DelimitedCellGrabber.java b/src/main/java/io/deephaven/csv/reading/cells/DelimitedCellGrabber.java
index 2110a13c..7c2857f6 100644
--- a/src/main/java/io/deephaven/csv/reading/cells/DelimitedCellGrabber.java
+++ b/src/main/java/io/deephaven/csv/reading/cells/DelimitedCellGrabber.java
@@ -300,8 +300,7 @@ private void processEscapeChar() throws CsvReaderException {
         // newline or tab.
         // However, it can't appear as the last character of the input.
         if (!tryEnsureMore()) {
-            throw new CsvReaderException(
-                    "The last character in the input was the escape character. This is not allowed. The escape character needs to be followed by another character.");
+            throw new CsvReaderException("The escape character cannot be the last character of the input");
         }
 
         // Consume the next char (the escaped character). Potentially transform it if it is one of the C escapes:
@@ -316,8 +315,23 @@ private void processEscapeChar() throws CsvReaderException {
         startOffset = offset;
     }
 
-    private static byte transformEscapedChar(byte nextChar) {
-        // Feeling some "Reflections on Trusting Trust" realness.
+
+    /**
+     * Interpret the set of character escapes supported by Java. We do not currently interpet the octal 0xx or Unicode
+     * escape sequences uxxxx
+     * 
+     * @param nextChar The character following the escape character.
+     * @return If one of (b, t, n, r, f), that value transformed to (\b, \t, \n, \r, \f). Otherwise, the value is
+     *         returned unchanged.
+     * @throws CsvReaderException if passed a non-ASCII character, carriage return, or newline.
+     */
+    private static byte transformEscapedChar(byte nextChar) throws CsvReaderException {
+        if (nextChar < 0) {
+            throw new CsvReaderException("Can't escape a non-ASCII character");
+        }
+        if (nextChar == '\r' || nextChar == '\n') {
+            throw new CsvReaderException("Can't escape a carriage return or newline");
+        }
         switch (nextChar) {
             case 'b':
                 return '\b';
diff --git a/src/test/java/io/deephaven/csv/EscapeTest.java b/src/test/java/io/deephaven/csv/EscapeTest.java
index f678b790..e4cf4fc2 100644
--- a/src/test/java/io/deephaven/csv/EscapeTest.java
+++ b/src/test/java/io/deephaven/csv/EscapeTest.java
@@ -9,6 +9,7 @@
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
 
 import java.io.InputStream;
 import java.lang.reflect.Array;
@@ -22,7 +23,7 @@ public class EscapeTest {
     @MethodSource("provideTuplesForEscapeTest")
     public void escapeTest(String input, Character escape, Object[] expectedValues) throws CsvReaderException {
         final Charset charset = StandardCharsets.UTF_8;
-        final CsvSpecs specs = CsvTestUtil.defaultCsvBuilder().hasHeaderRow(false).escape(escape).build();
+        final CsvSpecs specs = CsvTestUtil.defaultCsvBuilder().hasHeaderRow(false).quote('`').escape(escape).build();
         final InputStream stream = CsvTestUtil.toInputStream(input, charset);
         CsvReader.Result result = CsvReader.read(specs, stream, charset, CsvTestUtil.makeMySinkFactory());
 
@@ -37,74 +38,71 @@ public void escapeTest(String input, Character escape, Object[] expectedValues)
     }
 
     private static Stream provideTuplesForEscapeTest() {
+        // Note for Java readability we use unusual characters for quote and escape.
+        // Namely: quote is ` (backtick) and escape is | (vertical bar).
         return Stream.of(
                 // Cases that are not surrounded with quotes
 
-                // Deep\,haven,42 with no escape configured comes through as "Deep\", "haven", 42
-                Arguments.of("Deep\\,haven,42\n", null, new Object[] {"Deep\\", "haven", 42}),
-                // Deep\,haven,42 with escape configured as \ comes through as "Deep,haven", 42
-                Arguments.of("Deep\\,haven,42\n", '\\', new Object[] {"Deep,haven", 42}),
-                // Deephave\n,42 with no escape configured comes through as "Deephave\n", 42 with the literal backslash
-                Arguments.of("Deephave\\n,42\n", '*', new Object[] {"Deephave\\n", 42}),
-                // Deephave\n,42 with escape configured as \ comes through as "Deephave\n", with \n being newline
-                Arguments.of("Deephave\\n,42\n", '\\', new Object[] {"Deephave\n", 42}),
-                // Deep*,haven,42 with escape configured as * comes through as "Deep,haven", 42
-                Arguments.of("Deep*,haven,42\n", '*', new Object[] {"Deep,haven", 42}),
+                // Deep|,haven,42 with no escape configured comes through as "Deep|", "haven", 42
+                Arguments.of("Deep|,haven,42\n", null, new Object[] {"Deep|", "haven", 42}),
+                // Deep|,haven,42 with escape configured as | comes through as "Deep,haven", 42
+                Arguments.of("Deep|,haven,42\n", '|', new Object[] {"Deep,haven", 42}),
+                // Deephave|n,42 with no escape configured comes through as "Deephave|n", 42
+                Arguments.of("Deephave|n,42\n", null, new Object[] {"Deephave|n", 42}),
+                // Deephave|n,42 with escape configured as | comes through as "Deephave\n", with \n being newline
+                Arguments.of("Deephave|n,42\n", '|', new Object[] {"Deephave\n", 42}),
 
                 // Cases that are surrounded with quotes
 
-                // "Deep,haven",42 with no escape configured comes through as "Deep,haven", 42
+                // `Deep,haven`,42 with no escape configured comes through as "Deep,haven", 42
                 // because quotation marks are another way to escape the field separator.
-                Arguments.of("\"Deep,haven\",42\n", null, new Object[] {"Deep,haven", 42}),
-                // "Deep\,haven",42 with escape configured as \ also comes through as "Deep,haven", 42
-                // because the backslash escape is processed even inside quotes.
-                Arguments.of("\"Deep\\,haven\",42\n", '\\', new Object[] {"Deep,haven", 42}),
-                // "Deephave\n",42 with no escape configured comes through as "Deephave\n", 42 with the literal
-                // backslash
-                Arguments.of("\"Deephave\\n\",42\n", '*', new Object[] {"Deephave\\n", 42}),
-                // Deephave\n,42 with escape configured as \ comes through as "Deephave\n", with \n being newline
-                Arguments.of("\"Deephave\\n\",42\n", '\\', new Object[] {"Deephave\n", 42}),
-                // "Deep*,haven,42" with escape configured as * comes through as "Deep,haven", 42
-                Arguments.of("\"Deep*,haven\",42\n", '*', new Object[] {"Deep,haven", 42}),
+                Arguments.of("`Deep,haven`,42\n", null, new Object[] {"Deep,haven", 42}),
+                // `Deep|,haven`,42 with escape configured as | also comes through as "Deep,haven", 42
+                // because the escape is processed even inside quotes.
+                Arguments.of("`Deep|,haven`,42\n", '|', new Object[] {"Deep,haven", 42}),
+                // `Deephave|n`,42 with no escape configured comes through as "Deephave|n", 42
+                Arguments.of("`Deephave|n`,42\n", '*', new Object[] {"Deephave|n", 42}),
+                // `Deephave|n,42` with escape configured as | comes through as "Deephave\n", with \n being newline
+                Arguments.of("`Deephave|n`,42\n", '|', new Object[] {"Deephave\n", 42}),
 
                 // C style escapes
 
                 // Without escape configured, C-style escapes are not special
-                Arguments.of("Deep\\b\\r\\n\\t\\fhaven,42\n", null, new Object[] {"Deep\\b\\r\\n\\t\\fhaven", 42}),
+                Arguments.of("Deep|b|r|n|t|fhaven,42\n", null, new Object[] {"Deep|b|r|n|t|fhaven", 42}),
                 // With escape configured, C-style escapes are special
-                Arguments.of("Deep\\b\\r\\n\\t\\fhaven,42\n", '\\', new Object[] {"Deep\b\r\n\t\fhaven", 42}),
+                Arguments.of("Deep|b|r|n|t|fhaven,42\n", '|', new Object[] {"Deep\b\r\n\t\fhaven", 42}),
                 // Surrounding with quotes, without escape configured, C-style escapes are not special
-                Arguments.of("\"Deep\\b\\r\\n\\t\\fhaven\",42\n", null, new Object[] {"Deep\\b\\r\\n\\t\\fhaven", 42}),
+                Arguments.of("`Deep|b|r|n|t|fhaven`,42\n", null, new Object[] {"Deep|b|r|n|t|fhaven", 42}),
                 // Surrounding with quotes, with escape configured, C-style escapes are special
-                Arguments.of("\"Deep\\b\\r\\n\\t\\fhaven\",42\n", '\\', new Object[] {"Deep\b\r\n\t\fhaven", 42}),
+                Arguments.of("`Deep|b|r|n|t|fhaven`,42\n", '|', new Object[] {"Deep\b\r\n\t\fhaven", 42}),
 
                 // Quotation mark in the middle of unquoted text
 
                 // Without escape configured: a quotation mark in the middle is passed through
-                Arguments.of("Deep\"haven,42\n", null, new Object[] {"Deep\"haven", 42}),
+                Arguments.of("Deep`haven,42\n", null, new Object[] {"Deep`haven", 42}),
                 // With escape configured: a quotation mark in the middle is still passed through (not that interesting)
-                Arguments.of("Deep\"haven,42\n", '\\', new Object[] {"Deep\"haven", 42}),
-                // Without escape configured: an escaped quotation mark in the middle just passes through the \ and the
-                // "
-                Arguments.of("Deep\\\"haven,42\n", null, new Object[] {"Deep\\\"haven", 42}),
-                // With escape configured: an escaped quotation mark in the middle passes through the "
-                Arguments.of("Deep\\\"haven,42\n", '\\', new Object[] {"Deep\"haven", 42}),
+                Arguments.of("Deep`haven,42\n", '|', new Object[] {"Deep`haven", 42}),
+                // Without escape configured: an escaped quotation mark in the middle just passes through the | and the
+                // `
+                Arguments.of("Deep|`haven,42\n", null, new Object[] {"Deep|`haven", 42}),
+                // With escape configured: an escaped quotation mark in the middle passes through the `
+                Arguments.of("Deep|`haven,42\n", '|', new Object[] {"Deep`haven", 42}),
 
                 // Getting a quotation mark in the middle of quoted text
 
-                // Without escape configured: a double quotation mark in the middle is passed through as a single "
-                Arguments.of("\"Deep\"\"haven\",42\n", null, new Object[] {"Deep\"haven", 42}),
+                // Without escape configured: a double quotation mark in the middle is passed through as a single quote
+                Arguments.of("`Deep``haven`,42\n", null, new Object[] {"Deep`haven", 42}),
                 // With escape configured: a double quotation mark in the middle is still passed through (same as above)
-                Arguments.of("\"Deep\"\"haven\",42\n", '\\', new Object[] {"Deep\"haven", 42}),
-                // With escape configured: a single escaped quotation mark in the middle passes through the "
-                Arguments.of("\"Deep\\\"haven\",42\n", '\\', new Object[] {"Deep\"haven", 42}));
+                Arguments.of("`Deep``haven`,42\n", '|', new Object[] {"Deep`haven", 42}),
+                // With escape configured: a single escaped quotation mark in the middle passes through the quote
+                Arguments.of("`Deep|`haven`,42\n", '|', new Object[] {"Deep`haven", 42}));
     }
 
     @ParameterizedTest
     @MethodSource("provideTuplesForQuoteTest")
     public void choiceOfQuoteTest(String input, char quote, Object[] expectedValues) throws CsvReaderException {
         final Charset charset = StandardCharsets.UTF_8;
-        final CsvSpecs specs = CsvTestUtil.defaultCsvBuilder().hasHeaderRow(false).quote(quote).escape('\\').build();
+        final CsvSpecs specs = CsvTestUtil.defaultCsvBuilder().hasHeaderRow(false).quote(quote).escape('|').build();
         final InputStream stream = CsvTestUtil.toInputStream(input, charset);
         CsvReader.Result result = CsvReader.read(specs, stream, charset, CsvTestUtil.makeMySinkFactory());
 
@@ -120,17 +118,42 @@ public void choiceOfQuoteTest(String input, char quote, Object[] expectedValues)
 
     private static Stream provideTuplesForQuoteTest() {
         return Stream.of(
-                // \"Deep\"haven\",42 comes through as Deep*haven, 42
-                Arguments.of("\"Deep\\\"haven\",42\n", '"', new Object[] {"Deep\"haven", 42}),
-                // *Deep\*haven*,42 comes through as Deep*haven, 42
-                Arguments.of("*Deep\\*haven*,42\n", '*', new Object[] {"Deep*haven", 42}),
-                // nDeephave\nn,42 comes through as Deephave\n, 42 (where \n is the newline)
+                // `Deep|`haven`,42 comes through as Deep`haven, 42
+                Arguments.of("`Deep|`haven`,42\n", '`', new Object[] {"Deep`haven", 42}),
+                // *Deep|*haven*,42 comes through as Deep*haven, 42
+                Arguments.of("*Deep|*haven*,42\n", '*', new Object[] {"Deep*haven", 42}),
+                // nDeephave|nn,42 comes through as Deephave\n, 42 where n is the newline.
                 // This demonstrates the fanciful example that you *can* use 'n' as a quote character, but
                 // you should know that \n will translate to newline, not escape your quote character.
-                Arguments.of("nDeephave\\nn,42\n", 'n', new Object[] {"Deephave\n", 42}),
+                Arguments.of("nDeephave|nn,42\n", 'n', new Object[] {"Deephave\n", 42}),
                 // nDeephavennn,42 comes through as Deephaven, 42.
                 // Following up to the above, this shows if you use 'n' as a quote character
                 // and you want it in your data, you need to double it.
                 Arguments.of("nDeephavennn,42\n", 'n', new Object[] {"Deephaven", 42}));
     }
+
+    @ParameterizedTest
+    @MethodSource("provideTuplesForErroneousUseOfEscapeTest")
+    public void erroneousUseOfEscape(String input, String exceptionFragment) throws CsvReaderException {
+        final Charset charset = StandardCharsets.UTF_8;
+        final CsvSpecs specs = CsvTestUtil.defaultCsvBuilder().hasHeaderRow(false).escape('|').build();
+        final InputStream stream = CsvTestUtil.toInputStream(input, charset);
+
+        Assertions.assertThatThrownBy(() -> {
+                    CsvReader.read(specs, stream, charset, CsvTestUtil.makeMySinkFactory());
+                }).hasMessageContaining(exceptionFragment);
+    }
+
+    private static Stream provideTuplesForErroneousUseOfEscapeTest() {
+        return Stream.of(
+                // Last character of input cannot be escape
+                Arguments.of("hello|", "The escape character cannot be the last character of the input"),
+                // Cannot escape carriage return
+                Arguments.of("hello|\r", "Can't escape a carriage return or newline"),
+                // Cannot escape newline
+                Arguments.of("hello|\n", "Can't escape a carriage return or newline"),
+                // Cannot escape non-ASCII
+                Arguments.of("hello|❤", "Can't escape a non-ASCII character")
+        );
+    }
 }

From 58d6ebd2cde9523f1e85f0282c03b171223d58f9 Mon Sep 17 00:00:00 2001
From: Corey Kosak 
Date: Thu, 15 Jan 2026 17:23:19 -0500
Subject: [PATCH 3/3] spotless

---
 src/test/java/io/deephaven/csv/EscapeTest.java | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/test/java/io/deephaven/csv/EscapeTest.java b/src/test/java/io/deephaven/csv/EscapeTest.java
index e4cf4fc2..c7a107bf 100644
--- a/src/test/java/io/deephaven/csv/EscapeTest.java
+++ b/src/test/java/io/deephaven/csv/EscapeTest.java
@@ -140,8 +140,8 @@ public void erroneousUseOfEscape(String input, String exceptionFragment) throws
         final InputStream stream = CsvTestUtil.toInputStream(input, charset);
 
         Assertions.assertThatThrownBy(() -> {
-                    CsvReader.read(specs, stream, charset, CsvTestUtil.makeMySinkFactory());
-                }).hasMessageContaining(exceptionFragment);
+            CsvReader.read(specs, stream, charset, CsvTestUtil.makeMySinkFactory());
+        }).hasMessageContaining(exceptionFragment);
     }
 
     private static Stream provideTuplesForErroneousUseOfEscapeTest() {
@@ -153,7 +153,6 @@ private static Stream provideTuplesForErroneousUseOfEscapeTest() {
                 // Cannot escape newline
                 Arguments.of("hello|\n", "Can't escape a carriage return or newline"),
                 // Cannot escape non-ASCII
-                Arguments.of("hello|❤", "Can't escape a non-ASCII character")
-        );
+                Arguments.of("hello|❤", "Can't escape a non-ASCII character"));
     }
 }