From 4adb772c764146039d9567758a242424159dcef2 Mon Sep 17 00:00:00 2001 From: Gilles Dartiguelongue Date: Fri, 11 Jul 2025 13:43:09 +0200 Subject: [PATCH 1/3] Add sample IndentationError failure The test will fail for now showing the shortcomings of trying to tokenize the while at once. Signed-off-by: Gilles Dartiguelongue --- tests/check_license_headers_test.py | 1 + tests/resources/tokenize_indentation_error.sh | 27 +++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 tests/resources/tokenize_indentation_error.sh diff --git a/tests/check_license_headers_test.py b/tests/check_license_headers_test.py index b44517e..d1c9f00 100644 --- a/tests/check_license_headers_test.py +++ b/tests/check_license_headers_test.py @@ -30,6 +30,7 @@ def get_abspath_str(filename: str) -> str: ("invalid_owner.txt", 1), ("missing_header.py", 1), ("tokenize_fail.yaml", 2), + ("tokenize_indentation_error.sh", 2), ), ) def test_check_license_headers(filename, expected_retval): diff --git a/tests/resources/tokenize_indentation_error.sh b/tests/resources/tokenize_indentation_error.sh new file mode 100644 index 0000000..1f50b60 --- /dev/null +++ b/tests/resources/tokenize_indentation_error.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# +# Copyright (c) 2014-2016, 2018, 2020-2021 AFakeCompany Ltd +# +# Use of this source code is governed by a BSD-3-clause license that can +# be found in the LICENSE file or at https://opensource.org/licenses/BSD-3-Clause + +show_help() { + cat <&2 + exit 1 + ;; + esac +done From af0b9294c62bfbeab506377bb9c9e27c50835ef6 Mon Sep 17 00:00:00 2001 From: Gilles Dartiguelongue Date: Fri, 11 Jul 2025 13:50:46 +0200 Subject: [PATCH 2/3] Yield tokens as lines are yielded Leave a chance to detect the headers even if the content of the file does not validate against Python tokenizer. Signed-off-by: Gilles Dartiguelongue --- pre_commit_license_headers/check_license_headers.py | 4 +--- tests/check_license_headers_test.py | 4 ++-- ...e_indentation_error.sh => content_tokenization_skipped.sh} | 0 .../{tokenize_fail.yaml => content_tokenization_skipped.yaml} | 0 4 files changed, 3 insertions(+), 5 deletions(-) rename tests/resources/{tokenize_indentation_error.sh => content_tokenization_skipped.sh} (100%) rename tests/resources/{tokenize_fail.yaml => content_tokenization_skipped.yaml} (100%) diff --git a/pre_commit_license_headers/check_license_headers.py b/pre_commit_license_headers/check_license_headers.py index 29669d3..203b160 100644 --- a/pre_commit_license_headers/check_license_headers.py +++ b/pre_commit_license_headers/check_license_headers.py @@ -36,9 +36,7 @@ def check_license_headers(filepath: Path, header_pattern: str, debug: bool) -> b content_lines = [] with filepath.open() as f: - tokens_generator = list(tokenize.generate_tokens(f.readline)) - - for token in tokens_generator: + for token in tokenize.generate_tokens(f.readline): if token.type not in HEADER_TOKENS: # we've reached the end of the header break diff --git a/tests/check_license_headers_test.py b/tests/check_license_headers_test.py index d1c9f00..d5bf2c3 100644 --- a/tests/check_license_headers_test.py +++ b/tests/check_license_headers_test.py @@ -29,8 +29,8 @@ def get_abspath_str(filename: str) -> str: ("valid_2.py", 0), ("invalid_owner.txt", 1), ("missing_header.py", 1), - ("tokenize_fail.yaml", 2), - ("tokenize_indentation_error.sh", 2), + ("content_tokenization_skipped.yaml", 0), + ("content_tokenization_skipped.sh", 0), ), ) def test_check_license_headers(filename, expected_retval): diff --git a/tests/resources/tokenize_indentation_error.sh b/tests/resources/content_tokenization_skipped.sh similarity index 100% rename from tests/resources/tokenize_indentation_error.sh rename to tests/resources/content_tokenization_skipped.sh diff --git a/tests/resources/tokenize_fail.yaml b/tests/resources/content_tokenization_skipped.yaml similarity index 100% rename from tests/resources/tokenize_fail.yaml rename to tests/resources/content_tokenization_skipped.yaml From 0e93b4ceac980b466d93d2eeef9650695720d150 Mon Sep 17 00:00:00 2001 From: Gilles Dartiguelongue Date: Fri, 11 Jul 2025 13:57:31 +0200 Subject: [PATCH 3/3] Test tokenize exception as still caught Signed-off-by: Gilles Dartiguelongue --- tests/check_license_headers_test.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/check_license_headers_test.py b/tests/check_license_headers_test.py index d5bf2c3..97c2ba2 100644 --- a/tests/check_license_headers_test.py +++ b/tests/check_license_headers_test.py @@ -3,6 +3,8 @@ # be found in the LICENSE file or at https://opensource.org/licenses/BSD-3-Clause from pathlib import Path +from tokenize import TokenError +from unittest import mock import pytest @@ -136,3 +138,15 @@ def test_ignored_owner(capsys): assert e.value.code == 1 stdout, _ = capsys.readouterr() assert "'--owner' will be ignored" in stdout + + +def test_tokenize_exception(): + """check_license_headers raises TokenError marks file as skipped.""" + with pytest.raises(SystemExit) as e: + with mock.patch( + "pre_commit_license_headers.check_license_headers.check_license_headers" + ) as m: + m.side_effect = [TokenError] + main(base_args + [get_abspath_str("valid_1.py")]) + assert e.type == SystemExit + assert e.value.code == 2