From 6ed5a49b70ec728697c0d21c031f09a794165547 Mon Sep 17 00:00:00 2001 From: Marcel Hellkamp Date: Mon, 28 Oct 2024 10:53:56 +0100 Subject: [PATCH 1/2] fix: Reduce overhead while parsing junk Junk before the first or after the last boundary is permitted according to the specification, but should not trigger one log message per character. This patch removes the log lines, and also skips over all remaining bytes after the last boundary if possible. --- python_multipart/multipart.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python_multipart/multipart.py b/python_multipart/multipart.py index ace4a8f..5697df7 100644 --- a/python_multipart/multipart.py +++ b/python_multipart/multipart.py @@ -1105,7 +1105,6 @@ def data_callback(name: CallbackName, end_i: int, remaining: bool = False) -> No # Skip leading newlines if c == CR or c == LF: i += 1 - self.logger.debug("Skipping leading CR/LF at %d", i) continue # index is used as in index into our boundary. Set to 0. @@ -1398,9 +1397,9 @@ def data_callback(name: CallbackName, end_i: int, remaining: bool = False) -> No i -= 1 elif state == MultipartState.END: - # Do nothing and just consume a byte in the end state. - if c not in (CR, LF): - self.logger.warning("Consuming a byte '0x%x' in the end state", c) # pragma: no cover + # Skip junk after the last boundary + i = length + break else: # pragma: no cover (error case) # We got into a strange state somehow! Just stop processing. From 89eb2ae3cbf3c633bcbd74ebdc21c725a9cff41e Mon Sep 17 00:00:00 2001 From: Marcel Hellkamp Date: Mon, 28 Oct 2024 10:57:21 +0100 Subject: [PATCH 2/2] change: Fail if there is too much junk Fail if there are more than 16 superfluous new-lines in front of the first boundary, as this indicates a broken or malicious client. The spec technically allows it, but no browser or http client should ever do that. --- python_multipart/multipart.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python_multipart/multipart.py b/python_multipart/multipart.py index 5697df7..1382f5c 100644 --- a/python_multipart/multipart.py +++ b/python_multipart/multipart.py @@ -1102,6 +1102,14 @@ def data_callback(name: CallbackName, end_i: int, remaining: bool = False) -> No c = data[i] if state == MultipartState.START: + # Stop parsing if there is no boundary within the first chunk + if i == 16: + msg = "Too much junk in front of first boundary (%d)" % (i,) + self.logger.warning(msg) + e = MultipartParseError(msg) + e.offset = i + raise e + # Skip leading newlines if c == CR or c == LF: i += 1