From abed7a0a7d91aabe08ac56b3b8baa4632bc6c12d Mon Sep 17 00:00:00 2001
From: llbbl <logan@llbbl.com>
Date: Mon, 30 Jun 2025 22:42:27 -0500
Subject: [PATCH] feat: Add comprehensive Python testing infrastructure with
 Poetry

- Set up Poetry as package manager with pyproject.toml configuration
- Add testing dependencies: pytest, pytest-cov, pytest-mock
- Configure pytest with coverage reporting, custom markers, and test discovery
- Create tests directory structure with unit/integration subdirectories
- Add comprehensive test fixtures in conftest.py for mocking and testing
- Update .gitignore with testing and claude-related entries
- Include validation tests to verify infrastructure setup
---
 .gitignore                     |   6 ++
 pyproject.toml                 |  97 +++++++++++++++++++
 tests/__init__.py              |   0
 tests/conftest.py              | 164 +++++++++++++++++++++++++++++++++
 tests/integration/__init__.py  |   0
 tests/test_setup_validation.py | 140 ++++++++++++++++++++++++++++
 tests/unit/__init__.py         |   0
 7 files changed, 407 insertions(+)
 create mode 100644 pyproject.toml
 create mode 100644 tests/__init__.py
 create mode 100644 tests/conftest.py
 create mode 100644 tests/integration/__init__.py
 create mode 100644 tests/test_setup_validation.py
 create mode 100644 tests/unit/__init__.py

diff --git a/.gitignore b/.gitignore
index 4e52edb..4e0de34 100644
--- a/.gitignore
+++ b/.gitignore
@@ -119,3 +119,9 @@ venv.bak/
 
 # mypy
 .mypy_cache/
+
+# Claude
+.claude/*
+
+# Poetry
+poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..71624c8
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,97 @@
+[tool.poetry]
+name = "pdvc"
+version = "0.1.0"
+description = "Parallel Decoding for Video Captioning"
+authors = ["PDVC Team"]
+readme = "README.md"
+packages = [{include = "pdvc"}, {include = "video_backbone"}, {include = "data"}, {include = "misc"}, {include = "densevid_eval3"}, {include = "visualization"}]
+
+[tool.poetry.dependencies]
+python = "^3.8"
+h5py = "*"
+matplotlib = "*"
+numpy = "*"
+pandas = "*"
+Pillow = "*"
+PyYAML = "*"
+six = "*"
+tqdm = "*"
+tensorboardX = "*"
+colorlog = "*"
+scipy = "*"
+notebook = "*"
+av = "*"
+joblib = "*"
+google-trans-new = "*"
+torch = "*"
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.4.0"
+pytest-cov = "^4.1.0"
+pytest-mock = "^3.11.1"
+
+[tool.poetry.scripts]
+test = "pytest:main"
+tests = "pytest:main"
+
+[tool.pytest.ini_options]
+minversion = "7.0"
+testpaths = ["tests"]
+python_files = ["test_*.py", "*_test.py"]
+python_classes = ["Test*"]
+python_functions = ["test_*"]
+addopts = [
+    "--strict-markers",
+    "--strict-config",
+    "--verbose",
+    "--cov=pdvc",
+    "--cov=video_backbone",
+    "--cov=data",
+    "--cov=misc",
+    "--cov=densevid_eval3",
+    "--cov=visualization",
+    "--cov-branch",
+    "--cov-report=term-missing:skip-covered",
+    "--cov-report=html",
+    "--cov-report=xml",
+    "--cov-fail-under=80",
+]
+markers = [
+    "unit: Unit tests",
+    "integration: Integration tests",
+    "slow: Slow tests",
+]
+
+[tool.coverage.run]
+source = ["pdvc", "video_backbone", "data", "misc", "densevid_eval3", "visualization"]
+omit = [
+    "*/tests/*",
+    "*/test_*.py",
+    "*/__pycache__/*",
+    "*/site-packages/*",
+    "setup.py",
+    "*/pdvc/ops/src/*",
+    "*/pdvc/ops/test.py",
+]
+
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "def __repr__",
+    "if self.debug:",
+    "if __name__ == .__main__.:",
+    "raise AssertionError",
+    "raise NotImplementedError",
+    "if 0:",
+    "if False:",
+    "pass",
+]
+precision = 2
+show_missing = true
+
+[tool.coverage.html]
+directory = "htmlcov"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
\ No newline at end of file
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..bf5aac9
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,164 @@
+import os
+import tempfile
+import shutil
+from pathlib import Path
+from unittest.mock import Mock
+
+import pytest
+import torch
+import numpy as np
+import yaml
+
+
+@pytest.fixture
+def temp_dir():
+    """Create a temporary directory for test files."""
+    temp_path = tempfile.mkdtemp()
+    yield Path(temp_path)
+    shutil.rmtree(temp_path)
+
+
+@pytest.fixture
+def mock_config():
+    """Create a mock configuration dictionary for testing."""
+    return {
+        'train': {
+            'batch_size': 32,
+            'learning_rate': 1e-4,
+            'num_epochs': 10,
+            'grad_clip': 1.0,
+            'weight_decay': 1e-4,
+        },
+        'model': {
+            'visual_feature_dim': 2048,
+            'hidden_dim': 512,
+            'num_heads': 8,
+            'num_layers': 6,
+            'dropout': 0.1,
+        },
+        'data': {
+            'num_workers': 4,
+            'max_caption_len': 50,
+            'video_feature_path': '/data/features',
+        },
+        'eval': {
+            'beam_size': 5,
+            'max_pred_length': 50,
+        }
+    }
+
+
+@pytest.fixture
+def sample_video_features():
+    """Create sample video features for testing."""
+    batch_size = 2
+    num_frames = 10
+    feature_dim = 2048
+    
+    features = torch.randn(batch_size, num_frames, feature_dim)
+    mask = torch.ones(batch_size, num_frames, dtype=torch.bool)
+    
+    return {
+        'features': features,
+        'mask': mask,
+        'video_ids': ['video_001', 'video_002'],
+    }
+
+
+@pytest.fixture
+def sample_captions():
+    """Create sample captions for testing."""
+    return [
+        {
+            'video_id': 'video_001',
+            'captions': [
+                {'sentence': 'A person walks into the room', 'timestamp': [0.0, 3.5]},
+                {'sentence': 'They sit down on a chair', 'timestamp': [3.5, 7.0]},
+            ]
+        },
+        {
+            'video_id': 'video_002',
+            'captions': [
+                {'sentence': 'Someone opens a door', 'timestamp': [0.0, 2.0]},
+                {'sentence': 'They turn on the lights', 'timestamp': [2.0, 4.5]},
+            ]
+        }
+    ]
+
+
+@pytest.fixture
+def mock_model():
+    """Create a mock model for testing."""
+    model = Mock()
+    model.train = Mock(return_value=None)
+    model.eval = Mock(return_value=None)
+    model.parameters = Mock(return_value=[torch.randn(10, 10)])
+    model.forward = Mock(return_value={
+        'loss': torch.tensor(1.0),
+        'predictions': torch.randn(2, 10, 100)
+    })
+    return model
+
+
+@pytest.fixture
+def mock_dataloader():
+    """Create a mock dataloader for testing."""
+    dataloader = Mock()
+    dataloader.__iter__ = Mock(return_value=iter([
+        {
+            'video_features': torch.randn(2, 10, 2048),
+            'captions': ['caption1', 'caption2'],
+            'video_ids': ['vid1', 'vid2']
+        }
+    ]))
+    dataloader.__len__ = Mock(return_value=10)
+    return dataloader
+
+
+@pytest.fixture
+def sample_yaml_config(temp_dir):
+    """Create a sample YAML configuration file."""
+    config_path = temp_dir / 'config.yaml'
+    config = {
+        'experiment_name': 'test_experiment',
+        'model_type': 'pdvc',
+        'dataset': 'activitynet',
+        'checkpoint_dir': str(temp_dir / 'checkpoints'),
+        'log_dir': str(temp_dir / 'logs'),
+    }
+    
+    with open(config_path, 'w') as f:
+        yaml.dump(config, f)
+    
+    return config_path
+
+
+@pytest.fixture
+def mock_h5_file(temp_dir):
+    """Create a mock HDF5 file structure for testing."""
+    import h5py
+    
+    h5_path = temp_dir / 'features.h5'
+    with h5py.File(h5_path, 'w') as f:
+        # Add some sample video features
+        for i in range(5):
+            video_id = f'video_{i:03d}'
+            features = np.random.randn(20, 2048).astype(np.float32)
+            f.create_dataset(video_id, data=features)
+    
+    return h5_path
+
+
+@pytest.fixture
+def device():
+    """Get the appropriate device for testing."""
+    return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+
+@pytest.fixture(autouse=True)
+def reset_random_seeds():
+    """Reset random seeds before each test for reproducibility."""
+    np.random.seed(42)
+    torch.manual_seed(42)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(42)
\ No newline at end of file
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_setup_validation.py b/tests/test_setup_validation.py
new file mode 100644
index 0000000..198b739
--- /dev/null
+++ b/tests/test_setup_validation.py
@@ -0,0 +1,140 @@
+"""Validation tests to verify the testing infrastructure is properly configured."""
+
+import sys
+from pathlib import Path
+
+import pytest
+import torch
+import numpy as np
+import yaml
+import h5py
+
+
+class TestSetupValidation:
+    """Test class to validate the testing infrastructure setup."""
+    
+    def test_pytest_installed(self):
+        """Verify pytest is installed and accessible."""
+        assert 'pytest' in sys.modules
+    
+    def test_pytest_cov_installed(self):
+        """Verify pytest-cov is installed."""
+        try:
+            import pytest_cov
+            assert pytest_cov is not None
+        except ImportError:
+            pytest.fail("pytest-cov is not installed")
+    
+    def test_pytest_mock_installed(self):
+        """Verify pytest-mock is installed."""
+        try:
+            import pytest_mock
+            assert pytest_mock is not None
+        except ImportError:
+            pytest.fail("pytest-mock is not installed")
+    
+    def test_project_structure(self):
+        """Verify the project structure is correct."""
+        workspace = Path('/workspace')
+        
+        # Check main directories exist
+        assert (workspace / 'pdvc').exists(), "pdvc directory not found"
+        assert (workspace / 'video_backbone').exists(), "video_backbone directory not found"
+        assert (workspace / 'data').exists(), "data directory not found"
+        assert (workspace / 'misc').exists(), "misc directory not found"
+        assert (workspace / 'tests').exists(), "tests directory not found"
+        
+        # Check test subdirectories
+        assert (workspace / 'tests' / 'unit').exists(), "tests/unit directory not found"
+        assert (workspace / 'tests' / 'integration').exists(), "tests/integration directory not found"
+        assert (workspace / 'tests' / 'conftest.py').exists(), "conftest.py not found"
+    
+    def test_fixtures_available(self, temp_dir, mock_config, sample_video_features):
+        """Test that fixtures are properly configured and accessible."""
+        # Test temp_dir fixture
+        assert temp_dir.exists()
+        assert temp_dir.is_dir()
+        
+        # Test mock_config fixture
+        assert isinstance(mock_config, dict)
+        assert 'train' in mock_config
+        assert 'model' in mock_config
+        
+        # Test sample_video_features fixture
+        assert isinstance(sample_video_features, dict)
+        assert 'features' in sample_video_features
+        assert isinstance(sample_video_features['features'], torch.Tensor)
+    
+    @pytest.mark.unit
+    def test_unit_marker(self):
+        """Test that the unit marker works correctly."""
+        assert True
+    
+    @pytest.mark.integration
+    def test_integration_marker(self):
+        """Test that the integration marker works correctly."""
+        assert True
+    
+    @pytest.mark.slow
+    def test_slow_marker(self):
+        """Test that the slow marker works correctly."""
+        assert True
+    
+    def test_mock_functionality(self, mock_model, mocker):
+        """Test that mocking functionality works correctly."""
+        # Test fixture mock
+        assert hasattr(mock_model, 'forward')
+        result = mock_model.forward()
+        assert 'loss' in result
+        
+        # Test mocker functionality
+        mock_func = mocker.Mock(return_value=42)
+        assert mock_func() == 42
+    
+    def test_torch_available(self):
+        """Verify PyTorch is available and working."""
+        tensor = torch.tensor([1, 2, 3])
+        assert tensor.sum().item() == 6
+    
+    def test_numpy_available(self):
+        """Verify NumPy is available and working."""
+        arr = np.array([1, 2, 3])
+        assert arr.sum() == 6
+    
+    def test_yaml_functionality(self, sample_yaml_config):
+        """Test YAML reading/writing functionality."""
+        with open(sample_yaml_config, 'r') as f:
+            config = yaml.safe_load(f)
+        
+        assert isinstance(config, dict)
+        assert 'experiment_name' in config
+        assert config['experiment_name'] == 'test_experiment'
+    
+    def test_h5py_functionality(self, mock_h5_file):
+        """Test HDF5 file handling."""
+        with h5py.File(mock_h5_file, 'r') as f:
+            assert 'video_000' in f
+            features = f['video_000'][:]
+            assert features.shape == (20, 2048)
+    
+    def test_device_fixture(self, device):
+        """Test device fixture returns appropriate device."""
+        assert isinstance(device, torch.device)
+        assert device.type in ['cpu', 'cuda']
+    
+    def test_random_seed_fixture(self):
+        """Test that random seeds are properly set."""
+        # Generate some random numbers
+        np_random1 = np.random.rand()
+        torch_random1 = torch.rand(1).item()
+        
+        # Seeds should be reset by the fixture, so these should be deterministic
+        # across test runs (but different from each other within a test)
+        assert isinstance(np_random1, float)
+        assert isinstance(torch_random1, float)
+    
+    def test_capture_stdout_fixture(self, capsys):
+        """Test stdout capture functionality."""
+        print("Test output")
+        captured = capsys.readouterr()
+        assert "Test output" in captured.out
\ No newline at end of file
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
new file mode 100644
index 0000000..e69de29