From abed7a0a7d91aabe08ac56b3b8baa4632bc6c12d Mon Sep 17 00:00:00 2001 From: llbbl Date: Mon, 30 Jun 2025 22:42:27 -0500 Subject: [PATCH] feat: Add comprehensive Python testing infrastructure with Poetry - Set up Poetry as package manager with pyproject.toml configuration - Add testing dependencies: pytest, pytest-cov, pytest-mock - Configure pytest with coverage reporting, custom markers, and test discovery - Create tests directory structure with unit/integration subdirectories - Add comprehensive test fixtures in conftest.py for mocking and testing - Update .gitignore with testing and claude-related entries - Include validation tests to verify infrastructure setup --- .gitignore | 6 ++ pyproject.toml | 97 +++++++++++++++++++ tests/__init__.py | 0 tests/conftest.py | 164 +++++++++++++++++++++++++++++++++ tests/integration/__init__.py | 0 tests/test_setup_validation.py | 140 ++++++++++++++++++++++++++++ tests/unit/__init__.py | 0 7 files changed, 407 insertions(+) create mode 100644 pyproject.toml create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/integration/__init__.py create mode 100644 tests/test_setup_validation.py create mode 100644 tests/unit/__init__.py diff --git a/.gitignore b/.gitignore index 4e52edb..4e0de34 100644 --- a/.gitignore +++ b/.gitignore @@ -119,3 +119,9 @@ venv.bak/ # mypy .mypy_cache/ + +# Claude +.claude/* + +# Poetry +poetry.lock diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..71624c8 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,97 @@ +[tool.poetry] +name = "pdvc" +version = "0.1.0" +description = "Parallel Decoding for Video Captioning" +authors = ["PDVC Team"] +readme = "README.md" +packages = [{include = "pdvc"}, {include = "video_backbone"}, {include = "data"}, {include = "misc"}, {include = "densevid_eval3"}, {include = "visualization"}] + +[tool.poetry.dependencies] +python = "^3.8" +h5py = "*" +matplotlib = "*" +numpy = "*" +pandas = "*" +Pillow = "*" +PyYAML = "*" +six = "*" +tqdm = "*" +tensorboardX = "*" +colorlog = "*" +scipy = "*" +notebook = "*" +av = "*" +joblib = "*" +google-trans-new = "*" +torch = "*" + +[tool.poetry.group.dev.dependencies] +pytest = "^7.4.0" +pytest-cov = "^4.1.0" +pytest-mock = "^3.11.1" + +[tool.poetry.scripts] +test = "pytest:main" +tests = "pytest:main" + +[tool.pytest.ini_options] +minversion = "7.0" +testpaths = ["tests"] +python_files = ["test_*.py", "*_test.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +addopts = [ + "--strict-markers", + "--strict-config", + "--verbose", + "--cov=pdvc", + "--cov=video_backbone", + "--cov=data", + "--cov=misc", + "--cov=densevid_eval3", + "--cov=visualization", + "--cov-branch", + "--cov-report=term-missing:skip-covered", + "--cov-report=html", + "--cov-report=xml", + "--cov-fail-under=80", +] +markers = [ + "unit: Unit tests", + "integration: Integration tests", + "slow: Slow tests", +] + +[tool.coverage.run] +source = ["pdvc", "video_backbone", "data", "misc", "densevid_eval3", "visualization"] +omit = [ + "*/tests/*", + "*/test_*.py", + "*/__pycache__/*", + "*/site-packages/*", + "setup.py", + "*/pdvc/ops/src/*", + "*/pdvc/ops/test.py", +] + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "def __repr__", + "if self.debug:", + "if __name__ == .__main__.:", + "raise AssertionError", + "raise NotImplementedError", + "if 0:", + "if False:", + "pass", +] +precision = 2 +show_missing = true + +[tool.coverage.html] +directory = "htmlcov" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..bf5aac9 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,164 @@ +import os +import tempfile +import shutil +from pathlib import Path +from unittest.mock import Mock + +import pytest +import torch +import numpy as np +import yaml + + +@pytest.fixture +def temp_dir(): + """Create a temporary directory for test files.""" + temp_path = tempfile.mkdtemp() + yield Path(temp_path) + shutil.rmtree(temp_path) + + +@pytest.fixture +def mock_config(): + """Create a mock configuration dictionary for testing.""" + return { + 'train': { + 'batch_size': 32, + 'learning_rate': 1e-4, + 'num_epochs': 10, + 'grad_clip': 1.0, + 'weight_decay': 1e-4, + }, + 'model': { + 'visual_feature_dim': 2048, + 'hidden_dim': 512, + 'num_heads': 8, + 'num_layers': 6, + 'dropout': 0.1, + }, + 'data': { + 'num_workers': 4, + 'max_caption_len': 50, + 'video_feature_path': '/data/features', + }, + 'eval': { + 'beam_size': 5, + 'max_pred_length': 50, + } + } + + +@pytest.fixture +def sample_video_features(): + """Create sample video features for testing.""" + batch_size = 2 + num_frames = 10 + feature_dim = 2048 + + features = torch.randn(batch_size, num_frames, feature_dim) + mask = torch.ones(batch_size, num_frames, dtype=torch.bool) + + return { + 'features': features, + 'mask': mask, + 'video_ids': ['video_001', 'video_002'], + } + + +@pytest.fixture +def sample_captions(): + """Create sample captions for testing.""" + return [ + { + 'video_id': 'video_001', + 'captions': [ + {'sentence': 'A person walks into the room', 'timestamp': [0.0, 3.5]}, + {'sentence': 'They sit down on a chair', 'timestamp': [3.5, 7.0]}, + ] + }, + { + 'video_id': 'video_002', + 'captions': [ + {'sentence': 'Someone opens a door', 'timestamp': [0.0, 2.0]}, + {'sentence': 'They turn on the lights', 'timestamp': [2.0, 4.5]}, + ] + } + ] + + +@pytest.fixture +def mock_model(): + """Create a mock model for testing.""" + model = Mock() + model.train = Mock(return_value=None) + model.eval = Mock(return_value=None) + model.parameters = Mock(return_value=[torch.randn(10, 10)]) + model.forward = Mock(return_value={ + 'loss': torch.tensor(1.0), + 'predictions': torch.randn(2, 10, 100) + }) + return model + + +@pytest.fixture +def mock_dataloader(): + """Create a mock dataloader for testing.""" + dataloader = Mock() + dataloader.__iter__ = Mock(return_value=iter([ + { + 'video_features': torch.randn(2, 10, 2048), + 'captions': ['caption1', 'caption2'], + 'video_ids': ['vid1', 'vid2'] + } + ])) + dataloader.__len__ = Mock(return_value=10) + return dataloader + + +@pytest.fixture +def sample_yaml_config(temp_dir): + """Create a sample YAML configuration file.""" + config_path = temp_dir / 'config.yaml' + config = { + 'experiment_name': 'test_experiment', + 'model_type': 'pdvc', + 'dataset': 'activitynet', + 'checkpoint_dir': str(temp_dir / 'checkpoints'), + 'log_dir': str(temp_dir / 'logs'), + } + + with open(config_path, 'w') as f: + yaml.dump(config, f) + + return config_path + + +@pytest.fixture +def mock_h5_file(temp_dir): + """Create a mock HDF5 file structure for testing.""" + import h5py + + h5_path = temp_dir / 'features.h5' + with h5py.File(h5_path, 'w') as f: + # Add some sample video features + for i in range(5): + video_id = f'video_{i:03d}' + features = np.random.randn(20, 2048).astype(np.float32) + f.create_dataset(video_id, data=features) + + return h5_path + + +@pytest.fixture +def device(): + """Get the appropriate device for testing.""" + return torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + +@pytest.fixture(autouse=True) +def reset_random_seeds(): + """Reset random seeds before each test for reproducibility.""" + np.random.seed(42) + torch.manual_seed(42) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(42) \ No newline at end of file diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_setup_validation.py b/tests/test_setup_validation.py new file mode 100644 index 0000000..198b739 --- /dev/null +++ b/tests/test_setup_validation.py @@ -0,0 +1,140 @@ +"""Validation tests to verify the testing infrastructure is properly configured.""" + +import sys +from pathlib import Path + +import pytest +import torch +import numpy as np +import yaml +import h5py + + +class TestSetupValidation: + """Test class to validate the testing infrastructure setup.""" + + def test_pytest_installed(self): + """Verify pytest is installed and accessible.""" + assert 'pytest' in sys.modules + + def test_pytest_cov_installed(self): + """Verify pytest-cov is installed.""" + try: + import pytest_cov + assert pytest_cov is not None + except ImportError: + pytest.fail("pytest-cov is not installed") + + def test_pytest_mock_installed(self): + """Verify pytest-mock is installed.""" + try: + import pytest_mock + assert pytest_mock is not None + except ImportError: + pytest.fail("pytest-mock is not installed") + + def test_project_structure(self): + """Verify the project structure is correct.""" + workspace = Path('/workspace') + + # Check main directories exist + assert (workspace / 'pdvc').exists(), "pdvc directory not found" + assert (workspace / 'video_backbone').exists(), "video_backbone directory not found" + assert (workspace / 'data').exists(), "data directory not found" + assert (workspace / 'misc').exists(), "misc directory not found" + assert (workspace / 'tests').exists(), "tests directory not found" + + # Check test subdirectories + assert (workspace / 'tests' / 'unit').exists(), "tests/unit directory not found" + assert (workspace / 'tests' / 'integration').exists(), "tests/integration directory not found" + assert (workspace / 'tests' / 'conftest.py').exists(), "conftest.py not found" + + def test_fixtures_available(self, temp_dir, mock_config, sample_video_features): + """Test that fixtures are properly configured and accessible.""" + # Test temp_dir fixture + assert temp_dir.exists() + assert temp_dir.is_dir() + + # Test mock_config fixture + assert isinstance(mock_config, dict) + assert 'train' in mock_config + assert 'model' in mock_config + + # Test sample_video_features fixture + assert isinstance(sample_video_features, dict) + assert 'features' in sample_video_features + assert isinstance(sample_video_features['features'], torch.Tensor) + + @pytest.mark.unit + def test_unit_marker(self): + """Test that the unit marker works correctly.""" + assert True + + @pytest.mark.integration + def test_integration_marker(self): + """Test that the integration marker works correctly.""" + assert True + + @pytest.mark.slow + def test_slow_marker(self): + """Test that the slow marker works correctly.""" + assert True + + def test_mock_functionality(self, mock_model, mocker): + """Test that mocking functionality works correctly.""" + # Test fixture mock + assert hasattr(mock_model, 'forward') + result = mock_model.forward() + assert 'loss' in result + + # Test mocker functionality + mock_func = mocker.Mock(return_value=42) + assert mock_func() == 42 + + def test_torch_available(self): + """Verify PyTorch is available and working.""" + tensor = torch.tensor([1, 2, 3]) + assert tensor.sum().item() == 6 + + def test_numpy_available(self): + """Verify NumPy is available and working.""" + arr = np.array([1, 2, 3]) + assert arr.sum() == 6 + + def test_yaml_functionality(self, sample_yaml_config): + """Test YAML reading/writing functionality.""" + with open(sample_yaml_config, 'r') as f: + config = yaml.safe_load(f) + + assert isinstance(config, dict) + assert 'experiment_name' in config + assert config['experiment_name'] == 'test_experiment' + + def test_h5py_functionality(self, mock_h5_file): + """Test HDF5 file handling.""" + with h5py.File(mock_h5_file, 'r') as f: + assert 'video_000' in f + features = f['video_000'][:] + assert features.shape == (20, 2048) + + def test_device_fixture(self, device): + """Test device fixture returns appropriate device.""" + assert isinstance(device, torch.device) + assert device.type in ['cpu', 'cuda'] + + def test_random_seed_fixture(self): + """Test that random seeds are properly set.""" + # Generate some random numbers + np_random1 = np.random.rand() + torch_random1 = torch.rand(1).item() + + # Seeds should be reset by the fixture, so these should be deterministic + # across test runs (but different from each other within a test) + assert isinstance(np_random1, float) + assert isinstance(torch_random1, float) + + def test_capture_stdout_fixture(self, capsys): + """Test stdout capture functionality.""" + print("Test output") + captured = capsys.readouterr() + assert "Test output" in captured.out \ No newline at end of file diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29