diff --git a/.eslintrc.yml b/.eslintrc.yml index cf3015f..d464944 100644 --- a/.eslintrc.yml +++ b/.eslintrc.yml @@ -4,6 +4,8 @@ extends: - plugin:markdown/recommended plugins: - markdown +parserOptions: + ecmaVersion: 2021 overrides: - files: '**/*.md' processor: 'markdown/markdown' diff --git a/index.js b/index.js index 08458b3..d1e313a 100644 --- a/index.js +++ b/index.js @@ -15,19 +15,18 @@ module.exports = contentDisposition module.exports.parse = parse /** - * RegExp to match non attr-char, *after* encodeURIComponent (i.e. not including "%") + * TextDecoder instance for UTF-8 decoding when decodeURIComponent fails due to invalid byte sequences. + * @type {TextDecoder} * @private */ - -var ENCODE_URL_ATTR_CHAR_REGEXP = /[\x00-\x20"'()*,/:;<=>?@[\\\]{}\x7f]/g // eslint-disable-line no-control-regex +const utf8Decoder = new TextDecoder('utf-8') /** - * RegExp to match percent encoding escape. + * RegExp to match non attr-char, *after* encodeURIComponent (i.e. not including "%") * @private */ -var HEX_ESCAPE_REGEXP = /%[0-9A-Fa-f]{2}/ -var HEX_ESCAPE_REPLACE_REGEXP = /%([0-9A-Fa-f]{2})/g +var ENCODE_URL_ATTR_CHAR_REGEXP = /[\x00-\x20"'()*,/:;<=>?@[\\\]{}\x7f]/g // eslint-disable-line no-control-regex /** * RegExp to match non-latin1 characters. @@ -192,7 +191,7 @@ function createparams (filename, fallback) { var hasFallback = typeof fallbackName === 'string' && fallbackName !== name // set extended filename parameter - if (hasFallback || !isQuotedString || HEX_ESCAPE_REGEXP.test(name)) { + if (hasFallback || !isQuotedString || hasHexEscape(name)) { params['filename*'] = name } @@ -255,32 +254,41 @@ function format (obj) { */ function decodefield (str) { - var match = EXT_VALUE_REGEXP.exec(str) + const match = EXT_VALUE_REGEXP.exec(str) if (!match) { throw new TypeError('invalid extended field value') } - var charset = match[1].toLowerCase() - var encoded = match[2] - var value - - // to binary string - var binary = encoded.replace(HEX_ESCAPE_REPLACE_REGEXP, pdecode) + const charset = match[1].toLowerCase() + const encoded = match[2] switch (charset) { case 'iso-8859-1': - value = getlatin1(binary) - break + { + const binary = decodeHexEscapes(encoded) + return getlatin1(binary) + } case 'utf-8': case 'utf8': - value = Buffer.from(binary, 'binary').toString('utf8') - break - default: - throw new TypeError('unsupported charset in extended field') + { + try { + return decodeURIComponent(encoded) + } catch { + // Failed to decode with decodeURIComponent, fallback to lenient decoding which replaces invalid UTF-8 byte sequences with the Unicode replacement character + // TODO: Consider removing in the next major version to be more strict about invalid percent-encodings + const binary = decodeHexEscapes(encoded) + + const bytes = new Uint8Array(binary.length) + for (let idx = 0; idx < binary.length; idx++) { + bytes[idx] = binary.charCodeAt(idx) + } + + return utf8Decoder.decode(bytes) + } + } } - - return value + throw new TypeError('unsupported charset in extended field') } /** @@ -376,19 +384,6 @@ function parse (string) { return new ContentDisposition(type, params) } -/** - * Percent decode a single character. - * - * @param {string} str - * @param {string} hex - * @return {string} - * @private - */ - -function pdecode (str, hex) { - return String.fromCharCode(parseInt(hex, 16)) -} - /** * Percent encode a single character. * @@ -451,11 +446,11 @@ function ContentDisposition (type, parameters) { } /** - * Return the last portion of a path - * - * @param {string} path - * @returns {string} - */ + * Return the last portion of a path + * + * @param {string} path + * @returns {string} + */ function basename (path) { const normalized = path.replaceAll('\\', '/') @@ -475,3 +470,67 @@ function basename (path) { return normalized.slice(start + 1, end) } + +/** + * Check if a character is a hex digit [0-9A-Fa-f] + * + * @param {string} char + * @return {boolean} + * @private + */ +function isHexDigit (char) { + const code = char.charCodeAt(0) + return ( + (code >= 48 && code <= 57) || // 0-9 + (code >= 65 && code <= 70) || // A-F + (code >= 97 && code <= 102) // a-f + ) +} + +/** + * Check if a string contains percent encoding escapes. + * + * @param {string} str + * @return {boolean} + * @private + */ +function hasHexEscape (str) { + const maxIndex = str.length - 3 + let lastIndex = -1 + + while ((lastIndex = str.indexOf('%', lastIndex + 1)) !== -1 && lastIndex <= maxIndex) { + if (isHexDigit(str[lastIndex + 1]) && isHexDigit(str[lastIndex + 2])) { + return true + } + } + + return false +} + +/** + * Decode hex escapes in a string (e.g., %20 -> space) + * + * @param {string} str + * @return {string} + * @private + */ +function decodeHexEscapes (str) { + const firstEscape = str.indexOf('%') + if (firstEscape === -1) return str + + let result = str.slice(0, firstEscape) + for (let idx = firstEscape; idx < str.length; idx++) { + if ( + str[idx] === '%' && + idx + 2 < str.length && + isHexDigit(str[idx + 1]) && + isHexDigit(str[idx + 2]) + ) { + result += String.fromCharCode(Number.parseInt(str[idx + 1] + str[idx + 2], 16)) + idx += 2 + } else { + result += str[idx] + } + } + return result +}