From 01fcea505ba5eb4d4bf76b427a14f169e5623bd0 Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Mon, 3 Nov 2025 17:41:59 -0800 Subject: [PATCH 1/2] Add checks for surrogate pairing for UTF-8 generation --- .../jackson/core/json/UTF8JsonGenerator.java | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java b/src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java index dbef52e3d4..268aabc4ca 100644 --- a/src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java +++ b/src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java @@ -1528,9 +1528,12 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features); if (combineSurrogates && offset < end) { char highSurrogate = (char) ch; - char lowSurrogate = cbuf[offset++]; - outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr); - continue; + char lowSurrogate = cbuf[offset]; + if (_isEndOfSurrogatePair(lowSurrogate)) { + offset++; + outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr); + continue; + } } } outputPtr = _outputMultiByteChar(ch, outputPtr); @@ -1576,9 +1579,12 @@ private final void _writeStringSegment2(final String text, int offset, final int final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features); if (combineSurrogates && offset < end) { char highSurrogate = (char) ch; - char lowSurrogate = text.charAt(offset++); - outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr); - continue; + char lowSurrogate = text.charAt(offset); + if (_isEndOfSurrogatePair(lowSurrogate)) { + offset++; + outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr); + continue; + } } } outputPtr = _outputMultiByteChar(ch, outputPtr); @@ -1752,9 +1758,12 @@ private final void _writeCustomStringSegment2(final char[] cbuf, int offset, fin final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features); if (combineSurrogates && offset < end) { char highSurrogate = (char) ch; - char lowSurrogate = cbuf[offset++]; - outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr); - continue; + char lowSurrogate = cbuf[offset]; + if (_isEndOfSurrogatePair(lowSurrogate)) { + offset++; + outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr); + continue; + } } } outputPtr = _outputMultiByteChar(ch, outputPtr); @@ -1819,9 +1828,12 @@ private final void _writeCustomStringSegment2(final String text, int offset, fin final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features); if (combineSurrogates && offset < end) { char highSurrogate = (char) ch; - char lowSurrogate = text.charAt(offset++); - outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr); - continue; + char lowSurrogate = text.charAt(offset); + if (_isEndOfSurrogatePair(lowSurrogate)) { + offset++; + outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr); + continue; + } } } outputPtr = _outputMultiByteChar(ch, outputPtr); @@ -2291,5 +2303,11 @@ private static boolean _isStartOfSurrogatePair(final int ch) { // In 0xD800 - 0xDBFF range? return (ch & 0xFC00) == 0xD800; } + + // @since 2.21 + private static boolean _isEndOfSurrogatePair(final int ch) { + // In 0xDC00 - 0xDFFF range? + return (ch & 0xFC00) == 0xDC00; + } } From 0bf0d2e8d72f4c2c93310d84160308b9d3000152 Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Mon, 3 Nov 2025 17:46:42 -0800 Subject: [PATCH 2/2] Add release notes --- release-notes/VERSION-2.x | 2 ++ 1 file changed, 2 insertions(+) diff --git a/release-notes/VERSION-2.x b/release-notes/VERSION-2.x index 69ade83d46..3b251e308c 100644 --- a/release-notes/VERSION-2.x +++ b/release-notes/VERSION-2.x @@ -26,6 +26,8 @@ a pure JSON library. #1470: Add method `copyCurrentStructureExact()` to `JsonGenerator` (contributed by Lars H) #1477: Add `JsonGenerator.has(StreamWriteCapability)` convenience method +#1500: Add checks for surrogate pairing for UTF-8 generation +(fix by @cowtowncoder, w/ Claude code) 2.20.1 (30-Oct-2025)