From 4eeef03ae362620cfea6f71b57f04f19029db952 Mon Sep 17 00:00:00 2001 From: Phillip Barta Date: Tue, 10 Feb 2026 22:19:44 +0100 Subject: [PATCH 1/4] perf: use decodeURIComponent for UTF-8 extended parameter decoding --- index.js | 129 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 87 insertions(+), 42 deletions(-) diff --git a/index.js b/index.js index 08458b3..79e2758 100644 --- a/index.js +++ b/index.js @@ -21,14 +21,6 @@ module.exports.parse = parse var ENCODE_URL_ATTR_CHAR_REGEXP = /[\x00-\x20"'()*,/:;<=>?@[\\\]{}\x7f]/g // eslint-disable-line no-control-regex -/** - * RegExp to match percent encoding escape. - * @private - */ - -var HEX_ESCAPE_REGEXP = /%[0-9A-Fa-f]{2}/ -var HEX_ESCAPE_REPLACE_REGEXP = /%([0-9A-Fa-f]{2})/g - /** * RegExp to match non-latin1 characters. * @private @@ -192,7 +184,7 @@ function createparams (filename, fallback) { var hasFallback = typeof fallbackName === 'string' && fallbackName !== name // set extended filename parameter - if (hasFallback || !isQuotedString || HEX_ESCAPE_REGEXP.test(name)) { + if (hasFallback || !isQuotedString || hasHexEscape(name)) { params['filename*'] = name } @@ -255,32 +247,35 @@ function format (obj) { */ function decodefield (str) { - var match = EXT_VALUE_REGEXP.exec(str) + const match = EXT_VALUE_REGEXP.exec(str) if (!match) { throw new TypeError('invalid extended field value') } - var charset = match[1].toLowerCase() - var encoded = match[2] - var value - - // to binary string - var binary = encoded.replace(HEX_ESCAPE_REPLACE_REGEXP, pdecode) + const charset = match[1].toLowerCase() + const encoded = match[2] switch (charset) { case 'iso-8859-1': - value = getlatin1(binary) - break + { + const binary = decodeHexEscapes(encoded) + return getlatin1(binary) + } case 'utf-8': case 'utf8': - value = Buffer.from(binary, 'binary').toString('utf8') - break - default: - throw new TypeError('unsupported charset in extended field') + { + try { + return decodeURIComponent(encoded) + } catch { + // Failed to decode with decodeURIComponent, fallback to manual decoding which currently accepts any hex escapes and ignores invalid ones + // TODO: Consider removing in the next major version to be more strict about invalid percent-encodings + const binary = decodeHexEscapes(encoded) + return Buffer.from(binary, 'binary').toString('utf8') + } + } } - - return value + throw new TypeError('unsupported charset in extended field') } /** @@ -376,19 +371,6 @@ function parse (string) { return new ContentDisposition(type, params) } -/** - * Percent decode a single character. - * - * @param {string} str - * @param {string} hex - * @return {string} - * @private - */ - -function pdecode (str, hex) { - return String.fromCharCode(parseInt(hex, 16)) -} - /** * Percent encode a single character. * @@ -451,11 +433,11 @@ function ContentDisposition (type, parameters) { } /** - * Return the last portion of a path - * - * @param {string} path - * @returns {string} - */ + * Return the last portion of a path + * + * @param {string} path + * @returns {string} + */ function basename (path) { const normalized = path.replaceAll('\\', '/') @@ -475,3 +457,66 @@ function basename (path) { return normalized.slice(start + 1, end) } + +/** + * Check if a character is a hex digit [0-9A-Fa-f] + * + * @param {string} char + * @return {boolean} + * @private + */ +function isHexDigit (char) { + const code = char.charCodeAt(0) + return ( + (code >= 48 && code <= 57) || // 0-9 + (code >= 65 && code <= 70) || // A-F + (code >= 97 && code <= 102) // a-f + ) +} + +/** + * Check if a string contains percent encoding escapes. + * + * @param {string} str + * @return {boolean} + * @private + */ +function hasHexEscape (str) { + const firstEscape = str.indexOf('%') + if (firstEscape === -1) return false + + for (let idx = firstEscape; idx < str.length - 2; idx++) { + if (str[idx] === '%' && isHexDigit(str[idx + 1]) && isHexDigit(str[idx + 2])) { + return true + } + } + return false +} + +/** + * Decode hex escapes in a string (e.g., %20 -> space) + * + * @param {string} str + * @return {string} + * @private + */ +function decodeHexEscapes (str) { + const firstEscape = str.indexOf('%') + if (firstEscape === -1) return str + + let result = str.slice(0, firstEscape) + for (let idx = firstEscape; idx < str.length; idx++) { + if ( + str[idx] === '%' && + idx + 2 < str.length && + isHexDigit(str[idx + 1]) && + isHexDigit(str[idx + 2]) + ) { + result += String.fromCharCode(Number.parseInt(str[idx + 1] + str[idx + 2], 16)) + idx += 2 + } else { + result += str[idx] + } + } + return result +} From ab482be7ed0ceaf9c16c6ebea56b6dca81051e18 Mon Sep 17 00:00:00 2001 From: Phillip Barta Date: Thu, 19 Feb 2026 21:38:57 +0100 Subject: [PATCH 2/4] feat: fallback to TextEncoder for Browser Support --- .eslintrc.yml | 2 ++ index.js | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/.eslintrc.yml b/.eslintrc.yml index cf3015f..d464944 100644 --- a/.eslintrc.yml +++ b/.eslintrc.yml @@ -4,6 +4,8 @@ extends: - plugin:markdown/recommended plugins: - markdown +parserOptions: + ecmaVersion: 2021 overrides: - files: '**/*.md' processor: 'markdown/markdown' diff --git a/index.js b/index.js index 79e2758..457f77f 100644 --- a/index.js +++ b/index.js @@ -14,6 +14,13 @@ module.exports = contentDisposition module.exports.parse = parse +/** + * TextDecoder instance for UTF-8 decoding (when globalThis.Buffer is unavailable). + * @type {TextDecoder} + * @private + */ +let utf8Decoder + /** * RegExp to match non attr-char, *after* encodeURIComponent (i.e. not including "%") * @private @@ -268,10 +275,17 @@ function decodefield (str) { try { return decodeURIComponent(encoded) } catch { - // Failed to decode with decodeURIComponent, fallback to manual decoding which currently accepts any hex escapes and ignores invalid ones + // Failed to decode with decodeURIComponent, fallback to lenient decoding which replaces invalid UTF-8 byte sequences with the Unicode replacement character // TODO: Consider removing in the next major version to be more strict about invalid percent-encodings const binary = decodeHexEscapes(encoded) - return Buffer.from(binary, 'binary').toString('utf8') + + const bytes = new Uint8Array(binary.length) + for (let idx = 0; idx < binary.length; idx++) { + bytes[idx] = binary.charCodeAt(idx) + } + + utf8Decoder ??= new TextDecoder('utf-8') + return utf8Decoder.decode(bytes) } } } From e4a2f83f55766e3fc0b0c6e12ed613b11a65bcc0 Mon Sep 17 00:00:00 2001 From: Phillip Barta Date: Thu, 19 Feb 2026 22:03:40 +0100 Subject: [PATCH 3/4] fix: use better performing hasHexEscape method https://github.com/jshttp/content-disposition/pull/115#discussion_r2796189958 --- index.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/index.js b/index.js index 457f77f..235c110 100644 --- a/index.js +++ b/index.js @@ -496,14 +496,15 @@ function isHexDigit (char) { * @private */ function hasHexEscape (str) { - const firstEscape = str.indexOf('%') - if (firstEscape === -1) return false + const maxIndex = str.length - 3 + let lastIndex = -1 - for (let idx = firstEscape; idx < str.length - 2; idx++) { - if (str[idx] === '%' && isHexDigit(str[idx + 1]) && isHexDigit(str[idx + 2])) { + while ((lastIndex = str.indexOf('%', lastIndex + 1)) !== -1 && lastIndex <= maxIndex) { + if (isHexDigit(str[lastIndex + 1]) && isHexDigit(str[lastIndex + 2])) { return true } } + return false } From 774e80b6c2f888a7eced549625c6529b11b33cbd Mon Sep 17 00:00:00 2001 From: Phillip Barta Date: Thu, 19 Feb 2026 22:49:26 +0100 Subject: [PATCH 4/4] fix: use global TextDecoder instance and fix comment --- index.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/index.js b/index.js index 235c110..d1e313a 100644 --- a/index.js +++ b/index.js @@ -15,11 +15,11 @@ module.exports = contentDisposition module.exports.parse = parse /** - * TextDecoder instance for UTF-8 decoding (when globalThis.Buffer is unavailable). + * TextDecoder instance for UTF-8 decoding when decodeURIComponent fails due to invalid byte sequences. * @type {TextDecoder} * @private */ -let utf8Decoder +const utf8Decoder = new TextDecoder('utf-8') /** * RegExp to match non attr-char, *after* encodeURIComponent (i.e. not including "%") @@ -284,7 +284,6 @@ function decodefield (str) { bytes[idx] = binary.charCodeAt(idx) } - utf8Decoder ??= new TextDecoder('utf-8') return utf8Decoder.decode(bytes) } }