From b218febbda6e73ee0acc202cfe871c7a31ba2fbd Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Mon, 20 May 2024 16:59:45 +0000 Subject: [PATCH] [datasets] Prevent access to files outside dataset root. --- compiler_gym/datasets/files_dataset.py | 4 ++++ tests/datasets/files_dataset_test.py | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/compiler_gym/datasets/files_dataset.py b/compiler_gym/datasets/files_dataset.py index 8fc5f8bae..80a5f8dd6 100644 --- a/compiler_gym/datasets/files_dataset.py +++ b/compiler_gym/datasets/files_dataset.py @@ -122,6 +122,10 @@ def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark: f"{self.dataset_root}/{uri.path}{self.benchmark_file_suffix}" ) ) + # Limit file access to within dataset root. E.g. error on + # `../../parent.txt`. + if not os.path.realpath(path).startswith(os.path.realpath(self.dataset_root)): + raise LookupError(f"Invalid URL: {uri.path}") if not path.is_file(): raise LookupError(f"Benchmark not found: {uri} (file not found: {path})") return self.benchmark_class.from_file(uri, path) diff --git a/tests/datasets/files_dataset_test.py b/tests/datasets/files_dataset_test.py index 93a545dff..8b1ddab0e 100644 --- a/tests/datasets/files_dataset_test.py +++ b/tests/datasets/files_dataset_test.py @@ -102,6 +102,17 @@ def test_populated_dataset_benchmark_lookup_not_found(populated_dataset: FilesDa populated_dataset.benchmark("benchmark://test-v0/not/a/file") +@pytest.mark.parametrize("bad_path", ( + "../../file.txt", + "subdir/../../../../file.txt", +)) +def test_populated_dataset_benchmark_lookup_parent_directory(populated_dataset: FilesDataset, bad_path: str): + with pytest.raises( + LookupError, match=F"Invalid URL: benchmark://test-v0/{bad_path}" + ): + populated_dataset.benchmark(f"benchmark://test-v0/{bad_path}") + + def test_populated_dataset_with_file_extension_filter(populated_dataset: FilesDataset): populated_dataset.benchmark_file_suffix = ".jpg" assert list(populated_dataset.benchmark_uris()) == [