@@ -262,6 +262,14 @@ static String toHex16Bit(unsigned int x) {
262262 return result;
263263}
264264
265+ static void appendRaw (String& result, unsigned ch) {
266+ result += static_cast <char >(ch);
267+ }
268+
269+ static void appendHex (String& result, unsigned ch) {
270+ result.append (" \\ u" ).append (toHex16Bit (ch));
271+ }
272+
265273static String valueToQuotedStringN (const char * value, unsigned length,
266274 bool emitUTF8 = false ) {
267275 if (value == nullptr )
@@ -310,29 +318,26 @@ static String valueToQuotedStringN(const char* value, unsigned length,
310318 // sequence from occurring.
311319 default : {
312320 if (emitUTF8) {
313- result += *c;
321+ unsigned codepoint = static_cast <unsigned char >(*c);
322+ if (codepoint < 0x20 ) {
323+ appendHex (result, codepoint);
324+ } else {
325+ appendRaw (result, codepoint);
326+ }
314327 } else {
315- unsigned int codepoint = utf8ToCodepoint (c, end);
316- const unsigned int FIRST_NON_CONTROL_CODEPOINT = 0x20 ;
317- const unsigned int LAST_NON_CONTROL_CODEPOINT = 0x7F ;
318- const unsigned int FIRST_SURROGATE_PAIR_CODEPOINT = 0x10000 ;
319- // don't escape non-control characters
320- // (short escape sequence are applied above)
321- if (FIRST_NON_CONTROL_CODEPOINT <= codepoint &&
322- codepoint <= LAST_NON_CONTROL_CODEPOINT) {
323- result += static_cast <char >(codepoint);
324- } else if (codepoint <
325- FIRST_SURROGATE_PAIR_CODEPOINT) { // codepoint is in Basic
326- // Multilingual Plane
327- result += " \\ u" ;
328- result += toHex16Bit (codepoint);
329- } else { // codepoint is not in Basic Multilingual Plane
330- // convert to surrogate pair first
331- codepoint -= FIRST_SURROGATE_PAIR_CODEPOINT;
332- result += " \\ u" ;
333- result += toHex16Bit ((codepoint >> 10 ) + 0xD800 );
334- result += " \\ u" ;
335- result += toHex16Bit ((codepoint & 0x3FF ) + 0xDC00 );
328+ unsigned codepoint = utf8ToCodepoint (c, end); // modifies `c`
329+ if (codepoint < 0x20 ) {
330+ appendHex (result, codepoint);
331+ } else if (codepoint < 0x80 ) {
332+ appendRaw (result, codepoint);
333+ } else if (codepoint < 0x10000 ) {
334+ // Basic Multilingual Plane
335+ appendHex (result, codepoint);
336+ } else {
337+ // Extended Unicode. Encode 20 bits as a surrogate pair.
338+ codepoint -= 0x10000 ;
339+ appendHex (result, 0xd800 + ((codepoint >> 10 ) & 0x3ff ));
340+ appendHex (result, 0xdc00 + (codepoint & 0x3ff ));
336341 }
337342 }
338343 } break ;
0 commit comments