From bb78e30b55f705a197b0f30eda3620a34bee22d4 Mon Sep 17 00:00:00 2001 From: Jon Church Date: Wed, 5 Mar 2025 12:43:46 -0500 Subject: [PATCH 1/2] do not validate language in filename* --- index.js | 15 ++++++++------- test/test.js | 12 ++++++++++++ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/index.js b/index.js index efcd9ca..c0271fd 100644 --- a/index.js +++ b/index.js @@ -90,7 +90,7 @@ var TEXT_REGEXP = /^[\x20-\x7e\x80-\xff]+$/ var TOKEN_REGEXP = /^[!#$%&'*+.0-9A-Z^_`a-z|~-]+$/ /** - * RegExp for various RFC 5987 grammar + * RegExp for parsing extended parameter values per RFC 5987. * * ext-value = charset "'" [ language ] "'" value-chars * charset = "UTF-8" / "ISO-8859-1" / mime-charset @@ -99,10 +99,12 @@ var TOKEN_REGEXP = /^[!#$%&'*+.0-9A-Z^_`a-z|~-]+$/ * / "!" / "#" / "$" / "%" / "&" * / "+" / "-" / "^" / "_" / "`" * / "{" / "}" / "~" - * language = ( 2*3ALPHA [ extlang ] ) - * / 4ALPHA - * / 5*8ALPHA - * extlang = *3( "-" 3ALPHA ) + * + * language = + * (Optional: the two literal single quotes MUST appear, + * but the language field inside them may be empty. + * We ignoring the language content rather than validate it) + * * value-chars = *( pct-encoded / attr-char ) * pct-encoded = "%" HEXDIG HEXDIG * attr-char = ALPHA / DIGIT @@ -110,8 +112,7 @@ var TOKEN_REGEXP = /^[!#$%&'*+.0-9A-Z^_`a-z|~-]+$/ * / "^" / "_" / "`" / "|" / "~" * @private */ - -var EXT_VALUE_REGEXP = /^([A-Za-z0-9!#$%&+\-^_`{}~]+)'(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}|[A-Za-z]{4,8}|)'((?:%[0-9A-Fa-f]{2}|[A-Za-z0-9!#$&+.^_`|~-])+)$/ +var EXT_VALUE_REGEXP = /([A-Za-z0-9!#$%&+\-^_`{}~]+)'(?:[^']*)'((?:%[0-9A-Fa-f]{2}|[A-Za-z0-9!#$&+.^_`|~-])+)$/ /** * RegExp for various RFC 6266 grammar diff --git a/test/test.js b/test/test.js index 18efc43..7d7a08b 100644 --- a/test/test.js +++ b/test/test.js @@ -380,6 +380,11 @@ describe('contentDisposition.parse(string)', function () { /unsupported charset/) }) + it('should reject when missing embedded language', function () { + assert.throws(contentDisposition.parse.bind(null, 'attachment; filename*=UTF-8%E2%82%AC%20rates.pdf'), + /invalid extended field value/) + }) + it('should parse with embedded language', function () { assert.deepEqual(contentDisposition.parse('attachment; filename*=UTF-8\'en\'%E2%82%AC%20rates.pdf'), { type: 'attachment', @@ -387,6 +392,13 @@ describe('contentDisposition.parse(string)', function () { }) }) + it('should parse with embedded language with region subtag', function () { + assert.deepEqual(contentDisposition.parse('attachment; filename*=UTF-8\'en-US\'%E2%82%AC%20rates.pdf'), { + type: 'attachment', + parameters: { filename: '€ rates.pdf' } + }) + }) + it('should prefer extended parameter value', function () { assert.deepEqual(contentDisposition.parse('attachment; filename="EURO rates.pdf"; filename*=UTF-8\'\'%E2%82%AC%20rates.pdf'), { type: 'attachment', From b99f7af9c730af534584c725bf880c3d199beef4 Mon Sep 17 00:00:00 2001 From: Jon Church Date: Wed, 5 Mar 2025 13:51:41 -0500 Subject: [PATCH 2/2] fix typo Co-authored-by: Wes Todd --- index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.js b/index.js index c0271fd..0726837 100644 --- a/index.js +++ b/index.js @@ -103,7 +103,7 @@ var TOKEN_REGEXP = /^[!#$%&'*+.0-9A-Z^_`a-z|~-]+$/ * language = * (Optional: the two literal single quotes MUST appear, * but the language field inside them may be empty. - * We ignoring the language content rather than validate it) + * We are ignoring the language content rather than validate it) * * value-chars = *( pct-encoded / attr-char ) * pct-encoded = "%" HEXDIG HEXDIG