Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -155,25 +155,35 @@ private void processQuotedMode(final ByteSlice dest, final MutableBoolean lastIn
++offset;
startOffset = offset;
}
// We got out of the quoted string. Consume any trailing matter after the quote and before the
// field
// delimiter. Hopefully that trailing matter is just whitespace, but we shall see.
finishField(dest, lastInRow, endOfInput);

// From this point on, note that dest is a slice that may point to the underlying input buffer
// or the spill buffer. Take care from this point on to not disturb the input (e.g. by reading
// the next chunk) or the spill buffer.

// The easiest way to make all the above logic run smoothly is to let the final quotation mark
// (which will unconditionally be there) and subsequent whitespace (if any) into the field.
// Then we can simply trim it back out now.
// (which will unconditionally be there) and subsequent matter (if any) into the field.
// Then we can simply trim it back out, making sure that what we are trimming is only whitespace.
// After trimming, we will see if the expected number of chars matches the actual number of chars.
// The -1 here is because the number of characters processed includes the closing quote already.
final int expectedSize = spillBuffer.size() + offset - startOffset - 1;
finishField(dest, lastInRow, endOfInput);

// Trim away any trailing whitespace
while (dest.begin() != dest.end() && RangeTests.isSpaceOrTab(dest.back())) {
dest.setEnd(dest.end() - 1);
}

final String exceptionMessage = "Logic error: final non-whitespace in field is not quoteChar";

// Trim away the final quote char
if (dest.begin() == dest.end() || dest.back() != quoteChar) {
throw new RuntimeException("Logic error: final non-whitespace in field is not quoteChar");
throw new RuntimeException(exceptionMessage);
}
dest.setEnd(dest.end() - 1);

// Ensure we have the expected number of chars. The above logic can get misled if there are multiple
// closing quotes, as in the input "hello there"junk".
// The quote at the end of 'there' is the real closing quote; the remainder of the text is trash and should
// be rejected.
if (dest.size() != expectedSize) {
throw new RuntimeException(exceptionMessage);
}
}

/**
Expand Down
31 changes: 22 additions & 9 deletions src/test/java/io/deephaven/csv/CsvReaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.io.InputStream;
import java.math.BigDecimal;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.Instant;
Expand Down Expand Up @@ -1064,9 +1065,13 @@ public void quotingSuccessfulEdgeCases() throws CsvReaderException {
// #
+ "####\n"
// ##
+ "######\n";
+ "######\n"
// hello
+ "#hello#\n"
// hello
+ "#hello#\t\t \t\t\n";

final ColumnSet expected = ColumnSet.of(Column.ofRefs("Values", null, "#", "##"));
final ColumnSet expected = ColumnSet.of(Column.ofRefs("Values", null, "#", "##", "hello", "hello"));

CsvTestUtil.invokeTests(CsvTestUtil.defaultCsvBuilder().quote('#').build(), input, expected);
}
Expand All @@ -1081,14 +1086,22 @@ public void quotingFailingEdgeCases() {
.hasRootCauseMessage("Cell did not have closing quote character");
}

@Test
public void quotingExcessMaterial() {
final String input = "" + "Val1,Val2\n" + "#hello#junk,there\n"; // invalid

@ParameterizedTest
@ValueSource(strings = {
// trailing matter after closing quote (here the quote is the hashmark)
"#hello#junk,there\n",
// trailing matter after closing quote, but there's another quote at the end. still an error
"#hello#junk#,there\n"
})
public void quotingExcessNonwhitespaceMaterial(String input) {
Assertions.assertThatThrownBy(
() -> CsvTestUtil.invokeTests(CsvTestUtil.defaultCsvBuilder().quote('#').build(), input,
ColumnSet.NONE))
.hasRootCauseMessage("Logic error: final non-whitespace in field is not quoteChar");
() -> {
final CsvSpecs specs = CsvTestUtil.defaultCsvBuilder().hasHeaderRow(false).quote('#').build();
final Charset charset = StandardCharsets.UTF_8;
final InputStream stream = CsvTestUtil.toInputStream(input, charset);
CsvReader.read(specs, stream, charset, CsvTestUtil.makeMySinkFactory());
})
.hasMessage("Logic error: final non-whitespace in field is not quoteChar");
}

@Test
Expand Down