From 5018cf71cdfcef89a058233b1eb1002eb93dde57 Mon Sep 17 00:00:00 2001 From: Gunnar Kreitz Date: Thu, 13 Nov 2025 15:55:48 +0100 Subject: [PATCH 1/2] Add test coverage of default_validator --- tests/default_validator_tests/README.md | 38 +++++++ .../run_and_generate_expected.py | 104 ++++++++++++++++++ .../test_case_sensitive_ac/args.txt | 1 + .../expected_exit_code.txt | 1 + .../test_case_sensitive_ac/judge.ans | 1 + .../test_case_sensitive_ac/user.out | 1 + .../test_case_sensitive_wa/args.txt | 1 + .../expected_exit_code.txt | 1 + .../expected_message.txt | 4 + .../test_case_sensitive_wa/judge.ans | 1 + .../test_case_sensitive_wa/user.out | 1 + .../test_combined_flags_ac/args.txt | 1 + .../expected_exit_code.txt | 1 + .../test_combined_flags_ac/judge.ans | 1 + .../test_combined_flags_ac/user.out | 1 + .../test_combined_flags_wa/args.txt | 1 + .../expected_exit_code.txt | 1 + .../expected_message.txt | 6 + .../test_combined_flags_wa/judge.ans | 1 + .../test_combined_flags_wa/user.out | 1 + .../test_float_abs_tol_ac/args.txt | 1 + .../expected_exit_code.txt | 1 + .../test_float_abs_tol_ac/judge.ans | 1 + .../test_float_abs_tol_ac/user.out | 1 + .../test_float_abs_tol_wa/args.txt | 1 + .../expected_exit_code.txt | 1 + .../expected_message.txt | 6 + .../test_float_abs_tol_wa/judge.ans | 1 + .../test_float_abs_tol_wa/user.out | 1 + .../test_float_inf_judge_wa/args.txt | 1 + .../expected_exit_code.txt | 1 + .../expected_message.txt | 4 + .../test_float_inf_judge_wa/judge.ans | 1 + .../test_float_inf_judge_wa/user.out | 1 + .../test_float_rel_tol_ac/args.txt | 1 + .../expected_exit_code.txt | 1 + .../test_float_rel_tol_ac/judge.ans | 1 + .../test_float_rel_tol_ac/user.out | 1 + .../test_float_rel_tol_wa/args.txt | 1 + .../expected_exit_code.txt | 1 + .../expected_message.txt | 6 + .../test_float_rel_tol_wa/judge.ans | 1 + .../test_float_rel_tol_wa/user.out | 1 + .../test_float_tolerance_ac/args.txt | 1 + .../expected_exit_code.txt | 1 + .../test_float_tolerance_ac/judge.ans | 1 + .../test_float_tolerance_ac/user.out | 1 + .../test_float_tolerance_wa/args.txt | 1 + .../expected_exit_code.txt | 1 + .../expected_message.txt | 6 + .../test_float_tolerance_wa/judge.ans | 1 + .../test_float_tolerance_wa/user.out | 1 + .../test_simple_ac/args.txt | 0 .../test_simple_ac/expected_exit_code.txt | 1 + .../test_simple_ac/judge.ans | 1 + .../test_simple_ac/user.out | 1 + .../test_simple_wa/args.txt | 0 .../test_simple_wa/expected_exit_code.txt | 1 + .../test_simple_wa/expected_message.txt | 4 + .../test_simple_wa/judge.ans | 1 + .../test_simple_wa/user.out | 1 + .../test_space_sensitive_ac/args.txt | 1 + .../expected_exit_code.txt | 1 + .../test_space_sensitive_ac/judge.ans | 2 + .../test_space_sensitive_ac/user.out | 2 + .../test_space_sensitive_wa/args.txt | 1 + .../expected_exit_code.txt | 1 + .../expected_message.txt | 2 + .../test_space_sensitive_wa/judge.ans | 2 + .../test_space_sensitive_wa/user.out | 2 + tests/test_default_validator.py | 92 ++++++++++++++++ 71 files changed, 334 insertions(+) create mode 100644 tests/default_validator_tests/README.md create mode 100755 tests/default_validator_tests/run_and_generate_expected.py create mode 100644 tests/default_validator_tests/test_case_sensitive_ac/args.txt create mode 100644 tests/default_validator_tests/test_case_sensitive_ac/expected_exit_code.txt create mode 100644 tests/default_validator_tests/test_case_sensitive_ac/judge.ans create mode 100644 tests/default_validator_tests/test_case_sensitive_ac/user.out create mode 100644 tests/default_validator_tests/test_case_sensitive_wa/args.txt create mode 100644 tests/default_validator_tests/test_case_sensitive_wa/expected_exit_code.txt create mode 100644 tests/default_validator_tests/test_case_sensitive_wa/expected_message.txt create mode 100644 tests/default_validator_tests/test_case_sensitive_wa/judge.ans create mode 100644 tests/default_validator_tests/test_case_sensitive_wa/user.out create mode 100644 tests/default_validator_tests/test_combined_flags_ac/args.txt create mode 100644 tests/default_validator_tests/test_combined_flags_ac/expected_exit_code.txt create mode 100644 tests/default_validator_tests/test_combined_flags_ac/judge.ans create mode 100644 tests/default_validator_tests/test_combined_flags_ac/user.out create mode 100644 tests/default_validator_tests/test_combined_flags_wa/args.txt create mode 100644 tests/default_validator_tests/test_combined_flags_wa/expected_exit_code.txt create mode 100644 tests/default_validator_tests/test_combined_flags_wa/expected_message.txt create mode 100644 tests/default_validator_tests/test_combined_flags_wa/judge.ans create mode 100644 tests/default_validator_tests/test_combined_flags_wa/user.out create mode 100644 tests/default_validator_tests/test_float_abs_tol_ac/args.txt create mode 100644 tests/default_validator_tests/test_float_abs_tol_ac/expected_exit_code.txt create mode 100644 tests/default_validator_tests/test_float_abs_tol_ac/judge.ans create mode 100644 tests/default_validator_tests/test_float_abs_tol_ac/user.out create mode 100644 tests/default_validator_tests/test_float_abs_tol_wa/args.txt create mode 100644 tests/default_validator_tests/test_float_abs_tol_wa/expected_exit_code.txt create mode 100644 tests/default_validator_tests/test_float_abs_tol_wa/expected_message.txt create mode 100644 tests/default_validator_tests/test_float_abs_tol_wa/judge.ans create mode 100644 tests/default_validator_tests/test_float_abs_tol_wa/user.out create mode 100644 tests/default_validator_tests/test_float_inf_judge_wa/args.txt create mode 100644 tests/default_validator_tests/test_float_inf_judge_wa/expected_exit_code.txt create mode 100644 tests/default_validator_tests/test_float_inf_judge_wa/expected_message.txt create mode 100644 tests/default_validator_tests/test_float_inf_judge_wa/judge.ans create mode 100644 tests/default_validator_tests/test_float_inf_judge_wa/user.out create mode 100644 tests/default_validator_tests/test_float_rel_tol_ac/args.txt create mode 100644 tests/default_validator_tests/test_float_rel_tol_ac/expected_exit_code.txt create mode 100644 tests/default_validator_tests/test_float_rel_tol_ac/judge.ans create mode 100644 tests/default_validator_tests/test_float_rel_tol_ac/user.out create mode 100644 tests/default_validator_tests/test_float_rel_tol_wa/args.txt create mode 100644 tests/default_validator_tests/test_float_rel_tol_wa/expected_exit_code.txt create mode 100644 tests/default_validator_tests/test_float_rel_tol_wa/expected_message.txt create mode 100644 tests/default_validator_tests/test_float_rel_tol_wa/judge.ans create mode 100644 tests/default_validator_tests/test_float_rel_tol_wa/user.out create mode 100644 tests/default_validator_tests/test_float_tolerance_ac/args.txt create mode 100644 tests/default_validator_tests/test_float_tolerance_ac/expected_exit_code.txt create mode 100644 tests/default_validator_tests/test_float_tolerance_ac/judge.ans create mode 100644 tests/default_validator_tests/test_float_tolerance_ac/user.out create mode 100644 tests/default_validator_tests/test_float_tolerance_wa/args.txt create mode 100644 tests/default_validator_tests/test_float_tolerance_wa/expected_exit_code.txt create mode 100644 tests/default_validator_tests/test_float_tolerance_wa/expected_message.txt create mode 100644 tests/default_validator_tests/test_float_tolerance_wa/judge.ans create mode 100644 tests/default_validator_tests/test_float_tolerance_wa/user.out create mode 100644 tests/default_validator_tests/test_simple_ac/args.txt create mode 100644 tests/default_validator_tests/test_simple_ac/expected_exit_code.txt create mode 100644 tests/default_validator_tests/test_simple_ac/judge.ans create mode 100644 tests/default_validator_tests/test_simple_ac/user.out create mode 100644 tests/default_validator_tests/test_simple_wa/args.txt create mode 100644 tests/default_validator_tests/test_simple_wa/expected_exit_code.txt create mode 100644 tests/default_validator_tests/test_simple_wa/expected_message.txt create mode 100644 tests/default_validator_tests/test_simple_wa/judge.ans create mode 100644 tests/default_validator_tests/test_simple_wa/user.out create mode 100644 tests/default_validator_tests/test_space_sensitive_ac/args.txt create mode 100644 tests/default_validator_tests/test_space_sensitive_ac/expected_exit_code.txt create mode 100644 tests/default_validator_tests/test_space_sensitive_ac/judge.ans create mode 100644 tests/default_validator_tests/test_space_sensitive_ac/user.out create mode 100644 tests/default_validator_tests/test_space_sensitive_wa/args.txt create mode 100644 tests/default_validator_tests/test_space_sensitive_wa/expected_exit_code.txt create mode 100644 tests/default_validator_tests/test_space_sensitive_wa/expected_message.txt create mode 100644 tests/default_validator_tests/test_space_sensitive_wa/judge.ans create mode 100644 tests/default_validator_tests/test_space_sensitive_wa/user.out create mode 100644 tests/test_default_validator.py diff --git a/tests/default_validator_tests/README.md b/tests/default_validator_tests/README.md new file mode 100644 index 00000000..5727e928 --- /dev/null +++ b/tests/default_validator_tests/README.md @@ -0,0 +1,38 @@ +# Default Validator Tests + +This directory contains test cases for the `default_validator`. Each test case resides in its own subdirectory, named `test_` (e.g., `test_simple_ac` for a simple test that should be Accepted, or `test_float_wa` for a float comparison that should result in Wrong Answer). + +## Structure of a Test Case + +Each test case directory should contain the following files: + +* `judge.ans`: The reference answer file. This is what the `default_validator` will compare against. +* `user.out`: The user's output file that the `default_validator` will evaluate. +* `args.txt`: (Optional) A plain text file containing command-line arguments to be passed to the `default_validator`. Each argument should be space-separated. For example: `case_sensitive float_absolute_tolerance 0.001`. +* `expected_exit_code.txt`: Contains the expected exit code of the `default_validator` for this test case (e.g., `42` for Accepted, `43` for Wrong Answer). +* `expected_message.txt`: (Optional) Contains the exact error message expected from the `default_validator` if the test case results in a Wrong Answer. This file should only exist if a message is expected. + +## Adding a New Test Case + +To add a new test case: + +1. Create a new directory within `tests/default_validator_tests/` (e.g., `tests/default_validator_tests/test_my_new_feature_ac`). +2. Inside this new directory, create `judge.ans` and `user.out` files with the desired content for your test. +3. If your test requires specific command-line arguments for the `default_validator` (e.g., `case_sensitive`, `float_tolerance`), create an `args.txt` file in the directory with these arguments. +4. Use the `run_and_generate_expected.py` script to automatically generate `expected_exit_code.txt` and optionally `expected_message.txt`: + ```bash + cd tests/default_validator_tests/ + ./run_and_generate_expected.py test_my_new_feature_ac + ``` + This script will run the `default_validator` with your provided inputs and arguments, capture its exit code and any feedback message, and write them to the respective `expected_*.txt` files. + +## Updating an Existing Test Case + +If you modify `judge.ans`, `user.out`, or `args.txt` for an existing test case, you need to update its expected output: + +```bash +cd tests/default_validator_tests/ +./run_and_generate_expected.py test_case_to_update +``` + +This will regenerate the `expected_exit_code.txt` and `expected_message.txt` files based on your changes. Check with `git diff` and commit if the changes are what you expected. diff --git a/tests/default_validator_tests/run_and_generate_expected.py b/tests/default_validator_tests/run_and_generate_expected.py new file mode 100755 index 00000000..031badc4 --- /dev/null +++ b/tests/default_validator_tests/run_and_generate_expected.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +# +# A script to run the default_validator on a test case and generate the expected output files. +# +# Usage: ./run_and_generate_expected.py +# +# The test directory must contain: +# - judge.ans: The correct output. +# - user.out: The output to validate. +# - args.txt: (Optional) arguments to pass to the validator. +# +# The script will create: +# - expected_exit_code.txt: The exit code of the validator. +# - expected_message.txt: The message from the validator, if any. +# + +import argparse +import subprocess +import tempfile +from pathlib import Path +import sys + + +def main(): + """Main function""" + parser = argparse.ArgumentParser(description='Run default_validator and generate expected output files.') + parser.add_argument('test_dir', type=Path, help='Path to the test directory.') + args = parser.parse_args() + + test_dir: Path = args.test_dir + + if not test_dir.is_dir(): + print(f'Error: Test directory not found at {test_dir}', file=sys.stderr) + sys.exit(1) + + validator_path = Path(__file__).parent.parent.parent / 'support' / 'default_validator' / 'default_validator' + if not validator_path.is_file(): + print('Compiling default_validator...', file=sys.stderr) + try: + subprocess.run( + ['make', 'default_validator'], + cwd=validator_path.parent, + check=True, + capture_output=True, + text=True, + encoding='utf-8', + ) + except subprocess.CalledProcessError as e: + print(f'Failed to compile default_validator: {e.stderr}', file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f'An unexpected error occurred during compilation: {e}', file=sys.stderr) + sys.exit(1) + + if not validator_path.is_file(): + print(f'Validator executable not found at {validator_path} after compilation.', file=sys.stderr) + sys.exit(1) + + judge_ans = test_dir / 'judge.ans' + user_out = test_dir / 'user.out' + args_file = test_dir / 'args.txt' + + if not judge_ans.is_file(): + print(f'Error: judge.ans not found in {test_dir}', file=sys.stderr) + sys.exit(1) + if not user_out.is_file(): + print(f'Error: user.out not found in {test_dir}', file=sys.stderr) + sys.exit(1) + + validator_args = [] + if args_file.is_file(): + args_text = args_file.read_text(encoding='utf-8').strip() + if args_text: + validator_args = args_text.split() + + with tempfile.TemporaryDirectory() as feedback_dir: + # The validator expects judge_in, judge_ans, feedback_dir + # judge_in is not used by the validator for comparison, so we can pass a dummy file. + with tempfile.NamedTemporaryFile() as dummy_judge_in, open(user_out, 'r', encoding='utf-8') as user_out_f: + cmd = [str(validator_path), str(dummy_judge_in.name), str(judge_ans), feedback_dir, *validator_args] + + result = subprocess.run(cmd, stdin=user_out_f, capture_output=True, text=True, encoding='utf-8') + + # Write expected_exit_code.txt + (test_dir / 'expected_exit_code.txt').write_text(str(result.returncode) + '\n', encoding='utf-8') + print(f'Wrote exit code {result.returncode} to expected_exit_code.txt') + + # Write expected_message.txt if a message was generated + judgemessage_path = Path(feedback_dir) / 'judgemessage.txt' + if judgemessage_path.is_file(): + message = judgemessage_path.read_text(encoding='utf-8') + if message: + (test_dir / 'expected_message.txt').write_text(message, encoding='utf-8') + print('Wrote message to expected_message.txt') + else: + # If there's no message, we should remove any existing expected_message.txt + expected_message_file = test_dir / 'expected_message.txt' + if expected_message_file.is_file(): + expected_message_file.unlink() + print('Removed existing expected_message.txt as no message was generated.') + + +if __name__ == '__main__': + main() diff --git a/tests/default_validator_tests/test_case_sensitive_ac/args.txt b/tests/default_validator_tests/test_case_sensitive_ac/args.txt new file mode 100644 index 00000000..d342c077 --- /dev/null +++ b/tests/default_validator_tests/test_case_sensitive_ac/args.txt @@ -0,0 +1 @@ +case_sensitive diff --git a/tests/default_validator_tests/test_case_sensitive_ac/expected_exit_code.txt b/tests/default_validator_tests/test_case_sensitive_ac/expected_exit_code.txt new file mode 100644 index 00000000..d81cc071 --- /dev/null +++ b/tests/default_validator_tests/test_case_sensitive_ac/expected_exit_code.txt @@ -0,0 +1 @@ +42 diff --git a/tests/default_validator_tests/test_case_sensitive_ac/judge.ans b/tests/default_validator_tests/test_case_sensitive_ac/judge.ans new file mode 100644 index 00000000..e965047a --- /dev/null +++ b/tests/default_validator_tests/test_case_sensitive_ac/judge.ans @@ -0,0 +1 @@ +Hello diff --git a/tests/default_validator_tests/test_case_sensitive_ac/user.out b/tests/default_validator_tests/test_case_sensitive_ac/user.out new file mode 100644 index 00000000..e965047a --- /dev/null +++ b/tests/default_validator_tests/test_case_sensitive_ac/user.out @@ -0,0 +1 @@ +Hello diff --git a/tests/default_validator_tests/test_case_sensitive_wa/args.txt b/tests/default_validator_tests/test_case_sensitive_wa/args.txt new file mode 100644 index 00000000..d342c077 --- /dev/null +++ b/tests/default_validator_tests/test_case_sensitive_wa/args.txt @@ -0,0 +1 @@ +case_sensitive diff --git a/tests/default_validator_tests/test_case_sensitive_wa/expected_exit_code.txt b/tests/default_validator_tests/test_case_sensitive_wa/expected_exit_code.txt new file mode 100644 index 00000000..920a1396 --- /dev/null +++ b/tests/default_validator_tests/test_case_sensitive_wa/expected_exit_code.txt @@ -0,0 +1 @@ +43 diff --git a/tests/default_validator_tests/test_case_sensitive_wa/expected_message.txt b/tests/default_validator_tests/test_case_sensitive_wa/expected_message.txt new file mode 100644 index 00000000..1f8367d8 --- /dev/null +++ b/tests/default_validator_tests/test_case_sensitive_wa/expected_message.txt @@ -0,0 +1,4 @@ +Wrong answer on line 1 of output (corresponding to line 1 in answer file) +String tokens mismatch +Judge: "Hello" +User: "hello" diff --git a/tests/default_validator_tests/test_case_sensitive_wa/judge.ans b/tests/default_validator_tests/test_case_sensitive_wa/judge.ans new file mode 100644 index 00000000..e965047a --- /dev/null +++ b/tests/default_validator_tests/test_case_sensitive_wa/judge.ans @@ -0,0 +1 @@ +Hello diff --git a/tests/default_validator_tests/test_case_sensitive_wa/user.out b/tests/default_validator_tests/test_case_sensitive_wa/user.out new file mode 100644 index 00000000..ce013625 --- /dev/null +++ b/tests/default_validator_tests/test_case_sensitive_wa/user.out @@ -0,0 +1 @@ +hello diff --git a/tests/default_validator_tests/test_combined_flags_ac/args.txt b/tests/default_validator_tests/test_combined_flags_ac/args.txt new file mode 100644 index 00000000..d14be12a --- /dev/null +++ b/tests/default_validator_tests/test_combined_flags_ac/args.txt @@ -0,0 +1 @@ +case_sensitive space_change_sensitive float_tolerance 0.5 diff --git a/tests/default_validator_tests/test_combined_flags_ac/expected_exit_code.txt b/tests/default_validator_tests/test_combined_flags_ac/expected_exit_code.txt new file mode 100644 index 00000000..d81cc071 --- /dev/null +++ b/tests/default_validator_tests/test_combined_flags_ac/expected_exit_code.txt @@ -0,0 +1 @@ +42 diff --git a/tests/default_validator_tests/test_combined_flags_ac/judge.ans b/tests/default_validator_tests/test_combined_flags_ac/judge.ans new file mode 100644 index 00000000..1e085622 --- /dev/null +++ b/tests/default_validator_tests/test_combined_flags_ac/judge.ans @@ -0,0 +1 @@ +PI is 3.14 diff --git a/tests/default_validator_tests/test_combined_flags_ac/user.out b/tests/default_validator_tests/test_combined_flags_ac/user.out new file mode 100644 index 00000000..1e085622 --- /dev/null +++ b/tests/default_validator_tests/test_combined_flags_ac/user.out @@ -0,0 +1 @@ +PI is 3.14 diff --git a/tests/default_validator_tests/test_combined_flags_wa/args.txt b/tests/default_validator_tests/test_combined_flags_wa/args.txt new file mode 100644 index 00000000..d14be12a --- /dev/null +++ b/tests/default_validator_tests/test_combined_flags_wa/args.txt @@ -0,0 +1 @@ +case_sensitive space_change_sensitive float_tolerance 0.5 diff --git a/tests/default_validator_tests/test_combined_flags_wa/expected_exit_code.txt b/tests/default_validator_tests/test_combined_flags_wa/expected_exit_code.txt new file mode 100644 index 00000000..920a1396 --- /dev/null +++ b/tests/default_validator_tests/test_combined_flags_wa/expected_exit_code.txt @@ -0,0 +1 @@ +43 diff --git a/tests/default_validator_tests/test_combined_flags_wa/expected_message.txt b/tests/default_validator_tests/test_combined_flags_wa/expected_message.txt new file mode 100644 index 00000000..645020d1 --- /dev/null +++ b/tests/default_validator_tests/test_combined_flags_wa/expected_message.txt @@ -0,0 +1,6 @@ +Wrong answer on line 1 of output (corresponding to line 1 in answer file) +Too large difference. + Judge: 1.0 + User: 2.0 + Difference: -1.000000e+00 + (abs tol 5.000000e-01 rel tol 5.000000e-01) diff --git a/tests/default_validator_tests/test_combined_flags_wa/judge.ans b/tests/default_validator_tests/test_combined_flags_wa/judge.ans new file mode 100644 index 00000000..e59b8af9 --- /dev/null +++ b/tests/default_validator_tests/test_combined_flags_wa/judge.ans @@ -0,0 +1 @@ +PI is 1.0 \ No newline at end of file diff --git a/tests/default_validator_tests/test_combined_flags_wa/user.out b/tests/default_validator_tests/test_combined_flags_wa/user.out new file mode 100644 index 00000000..85332ebd --- /dev/null +++ b/tests/default_validator_tests/test_combined_flags_wa/user.out @@ -0,0 +1 @@ +PI is 2.0 \ No newline at end of file diff --git a/tests/default_validator_tests/test_float_abs_tol_ac/args.txt b/tests/default_validator_tests/test_float_abs_tol_ac/args.txt new file mode 100644 index 00000000..9f45cc54 --- /dev/null +++ b/tests/default_validator_tests/test_float_abs_tol_ac/args.txt @@ -0,0 +1 @@ +float_absolute_tolerance 0.5 diff --git a/tests/default_validator_tests/test_float_abs_tol_ac/expected_exit_code.txt b/tests/default_validator_tests/test_float_abs_tol_ac/expected_exit_code.txt new file mode 100644 index 00000000..d81cc071 --- /dev/null +++ b/tests/default_validator_tests/test_float_abs_tol_ac/expected_exit_code.txt @@ -0,0 +1 @@ +42 diff --git a/tests/default_validator_tests/test_float_abs_tol_ac/judge.ans b/tests/default_validator_tests/test_float_abs_tol_ac/judge.ans new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/tests/default_validator_tests/test_float_abs_tol_ac/judge.ans @@ -0,0 +1 @@ +1 diff --git a/tests/default_validator_tests/test_float_abs_tol_ac/user.out b/tests/default_validator_tests/test_float_abs_tol_ac/user.out new file mode 100644 index 00000000..c068b244 --- /dev/null +++ b/tests/default_validator_tests/test_float_abs_tol_ac/user.out @@ -0,0 +1 @@ +1.4 diff --git a/tests/default_validator_tests/test_float_abs_tol_wa/args.txt b/tests/default_validator_tests/test_float_abs_tol_wa/args.txt new file mode 100644 index 00000000..78db7f40 --- /dev/null +++ b/tests/default_validator_tests/test_float_abs_tol_wa/args.txt @@ -0,0 +1 @@ +float_absolute_tolerance 0.1 diff --git a/tests/default_validator_tests/test_float_abs_tol_wa/expected_exit_code.txt b/tests/default_validator_tests/test_float_abs_tol_wa/expected_exit_code.txt new file mode 100644 index 00000000..920a1396 --- /dev/null +++ b/tests/default_validator_tests/test_float_abs_tol_wa/expected_exit_code.txt @@ -0,0 +1 @@ +43 diff --git a/tests/default_validator_tests/test_float_abs_tol_wa/expected_message.txt b/tests/default_validator_tests/test_float_abs_tol_wa/expected_message.txt new file mode 100644 index 00000000..e3fb44b7 --- /dev/null +++ b/tests/default_validator_tests/test_float_abs_tol_wa/expected_message.txt @@ -0,0 +1,6 @@ +Wrong answer on line 1 of output (corresponding to line 1 in answer file) +Too large difference. + Judge: 1 + User: 1.4 + Difference: -4.000000e-01 + (abs tol 1.000000e-01 rel tol -1.000000e+00) diff --git a/tests/default_validator_tests/test_float_abs_tol_wa/judge.ans b/tests/default_validator_tests/test_float_abs_tol_wa/judge.ans new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/tests/default_validator_tests/test_float_abs_tol_wa/judge.ans @@ -0,0 +1 @@ +1 diff --git a/tests/default_validator_tests/test_float_abs_tol_wa/user.out b/tests/default_validator_tests/test_float_abs_tol_wa/user.out new file mode 100644 index 00000000..c068b244 --- /dev/null +++ b/tests/default_validator_tests/test_float_abs_tol_wa/user.out @@ -0,0 +1 @@ +1.4 diff --git a/tests/default_validator_tests/test_float_inf_judge_wa/args.txt b/tests/default_validator_tests/test_float_inf_judge_wa/args.txt new file mode 100644 index 00000000..63f54974 --- /dev/null +++ b/tests/default_validator_tests/test_float_inf_judge_wa/args.txt @@ -0,0 +1 @@ +float_tolerance 0.5 diff --git a/tests/default_validator_tests/test_float_inf_judge_wa/expected_exit_code.txt b/tests/default_validator_tests/test_float_inf_judge_wa/expected_exit_code.txt new file mode 100644 index 00000000..920a1396 --- /dev/null +++ b/tests/default_validator_tests/test_float_inf_judge_wa/expected_exit_code.txt @@ -0,0 +1 @@ +43 diff --git a/tests/default_validator_tests/test_float_inf_judge_wa/expected_message.txt b/tests/default_validator_tests/test_float_inf_judge_wa/expected_message.txt new file mode 100644 index 00000000..6ff0270d --- /dev/null +++ b/tests/default_validator_tests/test_float_inf_judge_wa/expected_message.txt @@ -0,0 +1,4 @@ +Wrong answer on line 1 of output (corresponding to line 1 in answer file) +String tokens mismatch +Judge: "inf" +User: "0" diff --git a/tests/default_validator_tests/test_float_inf_judge_wa/judge.ans b/tests/default_validator_tests/test_float_inf_judge_wa/judge.ans new file mode 100644 index 00000000..8484d062 --- /dev/null +++ b/tests/default_validator_tests/test_float_inf_judge_wa/judge.ans @@ -0,0 +1 @@ +inf diff --git a/tests/default_validator_tests/test_float_inf_judge_wa/user.out b/tests/default_validator_tests/test_float_inf_judge_wa/user.out new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/tests/default_validator_tests/test_float_inf_judge_wa/user.out @@ -0,0 +1 @@ +0 diff --git a/tests/default_validator_tests/test_float_rel_tol_ac/args.txt b/tests/default_validator_tests/test_float_rel_tol_ac/args.txt new file mode 100644 index 00000000..aa3aff07 --- /dev/null +++ b/tests/default_validator_tests/test_float_rel_tol_ac/args.txt @@ -0,0 +1 @@ +float_relative_tolerance 0.1 diff --git a/tests/default_validator_tests/test_float_rel_tol_ac/expected_exit_code.txt b/tests/default_validator_tests/test_float_rel_tol_ac/expected_exit_code.txt new file mode 100644 index 00000000..d81cc071 --- /dev/null +++ b/tests/default_validator_tests/test_float_rel_tol_ac/expected_exit_code.txt @@ -0,0 +1 @@ +42 diff --git a/tests/default_validator_tests/test_float_rel_tol_ac/judge.ans b/tests/default_validator_tests/test_float_rel_tol_ac/judge.ans new file mode 100644 index 00000000..29d6383b --- /dev/null +++ b/tests/default_validator_tests/test_float_rel_tol_ac/judge.ans @@ -0,0 +1 @@ +100 diff --git a/tests/default_validator_tests/test_float_rel_tol_ac/user.out b/tests/default_validator_tests/test_float_rel_tol_ac/user.out new file mode 100644 index 00000000..f96ac067 --- /dev/null +++ b/tests/default_validator_tests/test_float_rel_tol_ac/user.out @@ -0,0 +1 @@ +105 diff --git a/tests/default_validator_tests/test_float_rel_tol_wa/args.txt b/tests/default_validator_tests/test_float_rel_tol_wa/args.txt new file mode 100644 index 00000000..f8a80c5a --- /dev/null +++ b/tests/default_validator_tests/test_float_rel_tol_wa/args.txt @@ -0,0 +1 @@ +float_relative_tolerance 0.01 diff --git a/tests/default_validator_tests/test_float_rel_tol_wa/expected_exit_code.txt b/tests/default_validator_tests/test_float_rel_tol_wa/expected_exit_code.txt new file mode 100644 index 00000000..920a1396 --- /dev/null +++ b/tests/default_validator_tests/test_float_rel_tol_wa/expected_exit_code.txt @@ -0,0 +1 @@ +43 diff --git a/tests/default_validator_tests/test_float_rel_tol_wa/expected_message.txt b/tests/default_validator_tests/test_float_rel_tol_wa/expected_message.txt new file mode 100644 index 00000000..4917e259 --- /dev/null +++ b/tests/default_validator_tests/test_float_rel_tol_wa/expected_message.txt @@ -0,0 +1,6 @@ +Wrong answer on line 1 of output (corresponding to line 1 in answer file) +Too large difference. + Judge: 100 + User: 105 + Difference: -5.000000e+00 + (abs tol -1.000000e+00 rel tol 1.000000e-02) diff --git a/tests/default_validator_tests/test_float_rel_tol_wa/judge.ans b/tests/default_validator_tests/test_float_rel_tol_wa/judge.ans new file mode 100644 index 00000000..29d6383b --- /dev/null +++ b/tests/default_validator_tests/test_float_rel_tol_wa/judge.ans @@ -0,0 +1 @@ +100 diff --git a/tests/default_validator_tests/test_float_rel_tol_wa/user.out b/tests/default_validator_tests/test_float_rel_tol_wa/user.out new file mode 100644 index 00000000..f96ac067 --- /dev/null +++ b/tests/default_validator_tests/test_float_rel_tol_wa/user.out @@ -0,0 +1 @@ +105 diff --git a/tests/default_validator_tests/test_float_tolerance_ac/args.txt b/tests/default_validator_tests/test_float_tolerance_ac/args.txt new file mode 100644 index 00000000..63f54974 --- /dev/null +++ b/tests/default_validator_tests/test_float_tolerance_ac/args.txt @@ -0,0 +1 @@ +float_tolerance 0.5 diff --git a/tests/default_validator_tests/test_float_tolerance_ac/expected_exit_code.txt b/tests/default_validator_tests/test_float_tolerance_ac/expected_exit_code.txt new file mode 100644 index 00000000..d81cc071 --- /dev/null +++ b/tests/default_validator_tests/test_float_tolerance_ac/expected_exit_code.txt @@ -0,0 +1 @@ +42 diff --git a/tests/default_validator_tests/test_float_tolerance_ac/judge.ans b/tests/default_validator_tests/test_float_tolerance_ac/judge.ans new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/tests/default_validator_tests/test_float_tolerance_ac/judge.ans @@ -0,0 +1 @@ +1 diff --git a/tests/default_validator_tests/test_float_tolerance_ac/user.out b/tests/default_validator_tests/test_float_tolerance_ac/user.out new file mode 100644 index 00000000..9459d4ba --- /dev/null +++ b/tests/default_validator_tests/test_float_tolerance_ac/user.out @@ -0,0 +1 @@ +1.1 diff --git a/tests/default_validator_tests/test_float_tolerance_wa/args.txt b/tests/default_validator_tests/test_float_tolerance_wa/args.txt new file mode 100644 index 00000000..63f54974 --- /dev/null +++ b/tests/default_validator_tests/test_float_tolerance_wa/args.txt @@ -0,0 +1 @@ +float_tolerance 0.5 diff --git a/tests/default_validator_tests/test_float_tolerance_wa/expected_exit_code.txt b/tests/default_validator_tests/test_float_tolerance_wa/expected_exit_code.txt new file mode 100644 index 00000000..920a1396 --- /dev/null +++ b/tests/default_validator_tests/test_float_tolerance_wa/expected_exit_code.txt @@ -0,0 +1 @@ +43 diff --git a/tests/default_validator_tests/test_float_tolerance_wa/expected_message.txt b/tests/default_validator_tests/test_float_tolerance_wa/expected_message.txt new file mode 100644 index 00000000..1393c4b0 --- /dev/null +++ b/tests/default_validator_tests/test_float_tolerance_wa/expected_message.txt @@ -0,0 +1,6 @@ +Wrong answer on line 1 of output (corresponding to line 1 in answer file) +Too large difference. + Judge: 1 + User: 1.50001 + Difference: -5.000100e-01 + (abs tol 5.000000e-01 rel tol 5.000000e-01) diff --git a/tests/default_validator_tests/test_float_tolerance_wa/judge.ans b/tests/default_validator_tests/test_float_tolerance_wa/judge.ans new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/tests/default_validator_tests/test_float_tolerance_wa/judge.ans @@ -0,0 +1 @@ +1 diff --git a/tests/default_validator_tests/test_float_tolerance_wa/user.out b/tests/default_validator_tests/test_float_tolerance_wa/user.out new file mode 100644 index 00000000..5c4e32e5 --- /dev/null +++ b/tests/default_validator_tests/test_float_tolerance_wa/user.out @@ -0,0 +1 @@ +1.50001 diff --git a/tests/default_validator_tests/test_simple_ac/args.txt b/tests/default_validator_tests/test_simple_ac/args.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/default_validator_tests/test_simple_ac/expected_exit_code.txt b/tests/default_validator_tests/test_simple_ac/expected_exit_code.txt new file mode 100644 index 00000000..d81cc071 --- /dev/null +++ b/tests/default_validator_tests/test_simple_ac/expected_exit_code.txt @@ -0,0 +1 @@ +42 diff --git a/tests/default_validator_tests/test_simple_ac/judge.ans b/tests/default_validator_tests/test_simple_ac/judge.ans new file mode 100644 index 00000000..3b18e512 --- /dev/null +++ b/tests/default_validator_tests/test_simple_ac/judge.ans @@ -0,0 +1 @@ +hello world diff --git a/tests/default_validator_tests/test_simple_ac/user.out b/tests/default_validator_tests/test_simple_ac/user.out new file mode 100644 index 00000000..3b18e512 --- /dev/null +++ b/tests/default_validator_tests/test_simple_ac/user.out @@ -0,0 +1 @@ +hello world diff --git a/tests/default_validator_tests/test_simple_wa/args.txt b/tests/default_validator_tests/test_simple_wa/args.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/default_validator_tests/test_simple_wa/expected_exit_code.txt b/tests/default_validator_tests/test_simple_wa/expected_exit_code.txt new file mode 100644 index 00000000..920a1396 --- /dev/null +++ b/tests/default_validator_tests/test_simple_wa/expected_exit_code.txt @@ -0,0 +1 @@ +43 diff --git a/tests/default_validator_tests/test_simple_wa/expected_message.txt b/tests/default_validator_tests/test_simple_wa/expected_message.txt new file mode 100644 index 00000000..cc2e1c70 --- /dev/null +++ b/tests/default_validator_tests/test_simple_wa/expected_message.txt @@ -0,0 +1,4 @@ +Wrong answer on line 1 of output (corresponding to line 1 in answer file) +String tokens mismatch +Judge: "world" +User: "there" diff --git a/tests/default_validator_tests/test_simple_wa/judge.ans b/tests/default_validator_tests/test_simple_wa/judge.ans new file mode 100644 index 00000000..3b18e512 --- /dev/null +++ b/tests/default_validator_tests/test_simple_wa/judge.ans @@ -0,0 +1 @@ +hello world diff --git a/tests/default_validator_tests/test_simple_wa/user.out b/tests/default_validator_tests/test_simple_wa/user.out new file mode 100644 index 00000000..c7c7da3c --- /dev/null +++ b/tests/default_validator_tests/test_simple_wa/user.out @@ -0,0 +1 @@ +hello there diff --git a/tests/default_validator_tests/test_space_sensitive_ac/args.txt b/tests/default_validator_tests/test_space_sensitive_ac/args.txt new file mode 100644 index 00000000..846a47d2 --- /dev/null +++ b/tests/default_validator_tests/test_space_sensitive_ac/args.txt @@ -0,0 +1 @@ +space_change_sensitive diff --git a/tests/default_validator_tests/test_space_sensitive_ac/expected_exit_code.txt b/tests/default_validator_tests/test_space_sensitive_ac/expected_exit_code.txt new file mode 100644 index 00000000..d81cc071 --- /dev/null +++ b/tests/default_validator_tests/test_space_sensitive_ac/expected_exit_code.txt @@ -0,0 +1 @@ +42 diff --git a/tests/default_validator_tests/test_space_sensitive_ac/judge.ans b/tests/default_validator_tests/test_space_sensitive_ac/judge.ans new file mode 100644 index 00000000..54ba25fb --- /dev/null +++ b/tests/default_validator_tests/test_space_sensitive_ac/judge.ans @@ -0,0 +1,2 @@ +hello + world diff --git a/tests/default_validator_tests/test_space_sensitive_ac/user.out b/tests/default_validator_tests/test_space_sensitive_ac/user.out new file mode 100644 index 00000000..54ba25fb --- /dev/null +++ b/tests/default_validator_tests/test_space_sensitive_ac/user.out @@ -0,0 +1,2 @@ +hello + world diff --git a/tests/default_validator_tests/test_space_sensitive_wa/args.txt b/tests/default_validator_tests/test_space_sensitive_wa/args.txt new file mode 100644 index 00000000..846a47d2 --- /dev/null +++ b/tests/default_validator_tests/test_space_sensitive_wa/args.txt @@ -0,0 +1 @@ +space_change_sensitive diff --git a/tests/default_validator_tests/test_space_sensitive_wa/expected_exit_code.txt b/tests/default_validator_tests/test_space_sensitive_wa/expected_exit_code.txt new file mode 100644 index 00000000..920a1396 --- /dev/null +++ b/tests/default_validator_tests/test_space_sensitive_wa/expected_exit_code.txt @@ -0,0 +1 @@ +43 diff --git a/tests/default_validator_tests/test_space_sensitive_wa/expected_message.txt b/tests/default_validator_tests/test_space_sensitive_wa/expected_message.txt new file mode 100644 index 00000000..d11016de --- /dev/null +++ b/tests/default_validator_tests/test_space_sensitive_wa/expected_message.txt @@ -0,0 +1,2 @@ +Wrong answer on line 2 of output (corresponding to line 2 in answer file) +Space change error: got 119 expected 32 diff --git a/tests/default_validator_tests/test_space_sensitive_wa/judge.ans b/tests/default_validator_tests/test_space_sensitive_wa/judge.ans new file mode 100644 index 00000000..54ba25fb --- /dev/null +++ b/tests/default_validator_tests/test_space_sensitive_wa/judge.ans @@ -0,0 +1,2 @@ +hello + world diff --git a/tests/default_validator_tests/test_space_sensitive_wa/user.out b/tests/default_validator_tests/test_space_sensitive_wa/user.out new file mode 100644 index 00000000..94954abd --- /dev/null +++ b/tests/default_validator_tests/test_space_sensitive_wa/user.out @@ -0,0 +1,2 @@ +hello +world diff --git a/tests/test_default_validator.py b/tests/test_default_validator.py new file mode 100644 index 00000000..1f1456b5 --- /dev/null +++ b/tests/test_default_validator.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +import pytest +import subprocess +import tempfile +from pathlib import Path + +# The validator executable path, resolved relative to this test file. +VALIDATOR_PATH = Path(__file__).parent.parent / 'support' / 'default_validator' / 'default_validator' +# The directory containing test cases for the validator. +TESTS_DIR = Path(__file__).parent / 'default_validator_tests' + + +@pytest.fixture(scope='session') +def validator() -> Path: + """ + A session-scoped fixture to compile the default_validator executable. + This ensures the validator is compiled only once before any tests run. + It returns the path to the compiled executable. + """ + validator_parent_dir = VALIDATOR_PATH.parent + # Compile the validator using 'make'. + try: + subprocess.run( + ['make', 'default_validator'], cwd=validator_parent_dir, check=True, capture_output=True, text=True, encoding='utf-8' + ) + except subprocess.CalledProcessError as e: + pytest.fail(f'Failed to compile default_validator: {e.stderr}', pytrace=False) + except FileNotFoundError: + pytest.fail("'make' command not found. Please ensure 'make' is installed and in your PATH.", pytrace=False) + + if not VALIDATOR_PATH.is_file(): + pytest.fail(f'Validator executable not found at {VALIDATOR_PATH} after compilation.', pytrace=False) + + return VALIDATOR_PATH + + +def discover_test_cases(): + """ + Finds and returns a list of all test case directories. + A test case directory is expected to start with 'test_'. + """ + if not TESTS_DIR.is_dir(): + return [] + return [d for d in TESTS_DIR.iterdir() if d.is_dir() and d.name.startswith('test_')] + + +@pytest.mark.parametrize('test_dir', discover_test_cases(), ids=lambda d: d.name) +def test_default_validator(validator: Path, test_dir: Path): + """ + Runs a single validator test case. + The test is parametrized to run for each directory discovered by `discover_test_cases`. + """ + judge_ans = test_dir / 'judge.ans' + user_out = test_dir / 'user.out' + args_file = test_dir / 'args.txt' + expected_exit_code_file = test_dir / 'expected_exit_code.txt' + expected_message_file = test_dir / 'expected_message.txt' + + assert judge_ans.is_file(), f"'judge.ans' not found in {test_dir}" + assert user_out.is_file(), f"'user.out' not found in {test_dir}" + assert expected_exit_code_file.is_file(), f"'expected_exit_code.txt' not found in {test_dir}" + + args = [] + if args_file.is_file(): + args_text = args_file.read_text(encoding='utf-8').strip() + if args_text: + args = args_text.split() + + expected_exit_code = int(expected_exit_code_file.read_text(encoding='utf-8').strip()) + + with tempfile.TemporaryDirectory() as feedback_dir_str, open(user_out, 'r', encoding='utf-8') as user_out_f: + feedback_dir = Path(feedback_dir_str) + # The validator expects judge_in, judge_ans, feedback_dir. + # judge_in is not currently used by the validator for comparison, so we pass a dummy file. + with tempfile.NamedTemporaryFile() as dummy_judge_in: + cmd = [str(validator), str(dummy_judge_in.name), str(judge_ans), str(feedback_dir), *args] + + result = subprocess.run(cmd, stdin=user_out_f, capture_output=True, text=True, encoding='utf-8') + + assert result.returncode == expected_exit_code, f'Wrong exit code. Stderr: {result.stderr}' + + judgemessage_path = feedback_dir / 'judgemessage.txt' + if expected_message_file.is_file(): + assert judgemessage_path.is_file(), "'judgemessage.txt' was not created but was expected." + actual_message = judgemessage_path.read_text(encoding='utf-8').strip() + expected_message = expected_message_file.read_text(encoding='utf-8').strip() + assert actual_message == expected_message, 'The validation message did not match the expected message.' + else: + # If no message is expected, assert that no message was generated. + if judgemessage_path.is_file(): + actual_message = judgemessage_path.read_text(encoding='utf-8').strip() + assert not actual_message, f'A validation message was generated but none was expected: {actual_message}' From 0cb830301da57b3add8fc12d9233addf3f0bfc44 Mon Sep 17 00:00:00 2001 From: Gunnar Kreitz Date: Mon, 17 Nov 2025 15:44:09 +0100 Subject: [PATCH 2/2] Deal with binary user output and messages in default_validator tests --- .../default_validator_tests/run_and_generate_expected.py | 6 +++--- tests/test_default_validator.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/default_validator_tests/run_and_generate_expected.py b/tests/default_validator_tests/run_and_generate_expected.py index 031badc4..6323fdfb 100755 --- a/tests/default_validator_tests/run_and_generate_expected.py +++ b/tests/default_validator_tests/run_and_generate_expected.py @@ -76,7 +76,7 @@ def main(): with tempfile.TemporaryDirectory() as feedback_dir: # The validator expects judge_in, judge_ans, feedback_dir # judge_in is not used by the validator for comparison, so we can pass a dummy file. - with tempfile.NamedTemporaryFile() as dummy_judge_in, open(user_out, 'r', encoding='utf-8') as user_out_f: + with tempfile.NamedTemporaryFile() as dummy_judge_in, open(user_out, 'rb') as user_out_f: cmd = [str(validator_path), str(dummy_judge_in.name), str(judge_ans), feedback_dir, *validator_args] result = subprocess.run(cmd, stdin=user_out_f, capture_output=True, text=True, encoding='utf-8') @@ -88,9 +88,9 @@ def main(): # Write expected_message.txt if a message was generated judgemessage_path = Path(feedback_dir) / 'judgemessage.txt' if judgemessage_path.is_file(): - message = judgemessage_path.read_text(encoding='utf-8') + message = judgemessage_path.read_bytes() if message: - (test_dir / 'expected_message.txt').write_text(message, encoding='utf-8') + (test_dir / 'expected_message.txt').write_bytes(message) print('Wrote message to expected_message.txt') else: # If there's no message, we should remove any existing expected_message.txt diff --git a/tests/test_default_validator.py b/tests/test_default_validator.py index 1f1456b5..6623dccb 100644 --- a/tests/test_default_validator.py +++ b/tests/test_default_validator.py @@ -68,7 +68,7 @@ def test_default_validator(validator: Path, test_dir: Path): expected_exit_code = int(expected_exit_code_file.read_text(encoding='utf-8').strip()) - with tempfile.TemporaryDirectory() as feedback_dir_str, open(user_out, 'r', encoding='utf-8') as user_out_f: + with tempfile.TemporaryDirectory() as feedback_dir_str, open(user_out, 'rb') as user_out_f: feedback_dir = Path(feedback_dir_str) # The validator expects judge_in, judge_ans, feedback_dir. # judge_in is not currently used by the validator for comparison, so we pass a dummy file. @@ -82,11 +82,11 @@ def test_default_validator(validator: Path, test_dir: Path): judgemessage_path = feedback_dir / 'judgemessage.txt' if expected_message_file.is_file(): assert judgemessage_path.is_file(), "'judgemessage.txt' was not created but was expected." - actual_message = judgemessage_path.read_text(encoding='utf-8').strip() - expected_message = expected_message_file.read_text(encoding='utf-8').strip() + actual_message = judgemessage_path.read_bytes() + expected_message = expected_message_file.read_bytes() assert actual_message == expected_message, 'The validation message did not match the expected message.' else: # If no message is expected, assert that no message was generated. if judgemessage_path.is_file(): - actual_message = judgemessage_path.read_text(encoding='utf-8').strip() + actual_message = judgemessage_path.read_bytes() assert not actual_message, f'A validation message was generated but none was expected: {actual_message}'