diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 8118a66..4025084 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -19,3 +19,13 @@ jobs:
           pip install -e ".[dev]"
       - name: Run all tests
         run: pytest
+
+  integration-test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Run integration test with docker compose
+        run: |
+          docker compose up --build --abort-on-container-exit --exit-code-from github-etl
+      - name: Cleanup
+        run: docker compose down -v
diff --git a/Dockerfile b/Dockerfile
index 5608295..bec1ed8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,5 @@
 # Use the latest stable Python image
-FROM python:3.11-slim
+FROM python:3.14.2-slim
 
 # Set environment variables
 ENV PYTHONDONTWRITEBYTECODE=1 \
@@ -34,4 +34,4 @@ RUN chown -R app:app /app
 USER app
 
 # Set the default command
-CMD ["python", "main.py"]
\ No newline at end of file
+CMD ["python", "main.py"]
diff --git a/Dockerfile.mock b/Dockerfile.mock
index 1098382..cf46078 100644
--- a/Dockerfile.mock
+++ b/Dockerfile.mock
@@ -1,5 +1,5 @@
 # Dockerfile for mock GitHub API service
-FROM python:3.11-slim
+FROM python:3.14.2-slim
 
 WORKDIR /app
 
diff --git a/README.md b/README.md
index 80a3afe..d27188b 100644
--- a/README.md
+++ b/README.md
@@ -66,7 +66,7 @@ docker run --rm \
 
 ### Container Specifications
 
-- **Base Image**: `python:3.11-slim` (latest stable Python)
+- **Base Image**: `python:3.14.2-slim` (latest stable Python)
 - **User**: `app` (uid: 1000, gid: 1000)
 - **Working Directory**: `/app`
 - **Ownership**: All files in `/app` are owned by the `app` user
@@ -157,6 +157,97 @@ This setup includes:
 - **BigQuery Emulator**: Local BigQuery instance for testing
 - **ETL Service**: Configured to use both mock services
 
+### Running Tests
+
+The project includes a comprehensive test suite using pytest. Tests are organized in the `test/` directory and include both unit and integration tests.
+
+#### Setting Up the Development Environment
+
+1. **Install Python 3.14** (or your compatible Python version)
+
+2. **Install development dependencies**:
+
+   ```bash
+   # Install the package with dev dependencies
+   pip install -e ".[dev]"
+   ```
+
+   This installs:
+   - `pytest` - Testing framework
+   - `pytest-mock` - Mocking utilities for tests
+   - `ruff` - Linter
+   - `black` - Code formatter
+
+3. **Verify installation**:
+
+   ```bash
+   pytest --version
+   ```
+
+#### Running the Tests
+
+Run all tests:
+
+```bash
+pytest
+```
+
+Run tests with verbose output:
+
+```bash
+pytest -v
+```
+
+Run specific test files:
+
+```bash
+pytest tests/test_extract_pull_requests.py
+pytest tests/test_transform_data.py
+```
+
+Run tests by marker:
+
+```bash
+# Run only unit tests
+pytest -m unit
+
+# Run only integration tests
+pytest -m integration
+
+# Skip slow tests
+pytest -m "not slow"
+```
+
+Run tests with coverage reporting:
+
+```bash
+pytest --cov=. --cov-report=html
+```
+
+#### Test Organization
+
+The test suite is organized into the following files:
+
+- `tests/conftest.py` - Shared pytest fixtures and test configuration
+- `tests/test_extract_pull_requests.py` - Tests for PR extraction logic
+- `tests/test_extract_commits.py` - Tests for commit extraction
+- `tests/test_extract_comments.py` - Tests for comment extraction
+- `tests/test_extract_reviewers.py` - Tests for reviewer extraction
+- `tests/test_transform_data.py` - Tests for data transformation
+- `tests/test_load_data.py` - Tests for BigQuery loading
+- `tests/test_rate_limit.py` - Tests for rate limit handling
+- `tests/test_main_integration.py` - End-to-end integration tests
+- `tests/test_logging.py` - Tests for logging setup
+- `tests/test_formatting.py` - Code formatting tests
+
+#### Test Markers
+
+Tests are marked with the following pytest markers:
+
+- `@pytest.mark.unit` - Unit tests for individual functions
+- `@pytest.mark.integration` - Integration tests across multiple components
+- `@pytest.mark.slow` - Tests that take longer to run
+
 ### Adding Dependencies
 
 Add new Python packages to `requirements.txt` and rebuild the Docker image.
diff --git a/pyproject.toml b/pyproject.toml
index f4aac49..ed3b2a4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,7 @@ dependencies = [
 [project.optional-dependencies]
 dev = [
     "pytest>=7.0.0",
+    "pytest-mock>=3.10.0",
     "ruff>=0.14.14",
     "black>=24.0.0",
 ]
diff --git a/requirements.txt b/requirements.txt
index fd521f6..d487f50 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.14
+# This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
 #    pip-compile --generate-hashes pyproject.toml
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..0656e29
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,284 @@
+"""
+Pytest fixtures for GitHub ETL tests.
+
+This module provides reusable test fixtures for mocking external dependencies
+and providing sample data for unit and integration tests.
+"""
+
+from datetime import datetime, timezone
+from typing import Any
+from unittest.mock import MagicMock, Mock
+
+import pytest
+import requests
+from google.cloud import bigquery
+
+
+@pytest.fixture
+def mock_env_vars(monkeypatch) -> dict[str, str]:
+    """
+    Set up common environment variables for tests.
+
+    Returns:
+        Dictionary of environment variables that were set
+    """
+    env_vars = {
+        "GITHUB_TOKEN": "test_token_123",
+        "GITHUB_REPOS": "mozilla/firefox",
+        "BIGQUERY_PROJECT": "test-project",
+        "BIGQUERY_DATASET": "test_dataset",
+    }
+    for key, value in env_vars.items():
+        monkeypatch.setenv(key, value)
+    return env_vars
+
+
+@pytest.fixture
+def sample_github_pr() -> dict[str, Any]:
+    """
+    Sample GitHub pull request data from API response.
+
+    Returns:
+        Dictionary representing a single PR from GitHub API
+    """
+    return {
+        "number": 12345,
+        "state": "closed",
+        "title": "Bug 1234567 - Fix memory leak in parser",
+        "created_at": "2025-01-01T10:00:00Z",
+        "updated_at": "2025-01-02T15:30:00Z",
+        "merged_at": "2025-01-02T15:30:00Z",
+        "labels": [
+            {"name": "bug"},
+            {"name": "priority-high"},
+        ],
+        "user": {
+            "login": "test_user",
+            "id": 123,
+        },
+        "head": {
+            "ref": "feature-branch",
+            "sha": "abc123",
+        },
+        "base": {
+            "ref": "main",
+            "sha": "def456",
+        },
+        "commit_data": [],
+        "reviewer_data": [],
+        "comment_data": [],
+    }
+
+
+@pytest.fixture
+def sample_github_commit() -> dict[str, Any]:
+    """
+    Sample GitHub commit data from API response.
+
+    Returns:
+        Dictionary representing a single commit from GitHub API
+    """
+    return {
+        "sha": "abc123def456",
+        "commit": {
+            "author": {
+                "name": "Test Author",
+                "email": "author@example.com",
+                "date": "2025-01-01T10:00:00Z",
+            },
+            "message": "Fix bug in parser",
+        },
+        "files": [
+            {
+                "filename": "src/parser.py",
+                "additions": 10,
+                "deletions": 5,
+                "changes": 15,
+            }
+        ],
+    }
+
+
+@pytest.fixture
+def sample_github_reviewer() -> dict[str, Any]:
+    """
+    Sample GitHub review data from API response.
+
+    Returns:
+        Dictionary representing a single review from GitHub API
+    """
+    return {
+        "id": 98765,
+        "user": {
+            "login": "reviewer_user",
+            "id": 456,
+        },
+        "state": "APPROVED",
+        "submitted_at": "2025-01-02T12:00:00Z",
+        "body": "LGTM",
+    }
+
+
+@pytest.fixture
+def sample_github_comment() -> dict[str, Any]:
+    """
+    Sample GitHub comment data from API response.
+
+    Returns:
+        Dictionary representing a single comment from GitHub API
+    """
+    return {
+        "id": 111222,
+        "user": {
+            "login": "commenter_user",
+            "id": 789,
+        },
+        "created_at": "2025-01-01T14:00:00Z",
+        "body": "Please check the edge case for null values",
+        "pull_request_review_id": None,
+    }
+
+
+@pytest.fixture
+def sample_transformed_data() -> dict[str, list[dict]]:
+    """
+    Sample transformed data ready for BigQuery insertion.
+
+    Returns:
+        Dictionary with keys for each table and transformed row data
+    """
+    return {
+        "pull_requests": [
+            {
+                "pull_request_id": 12345,
+                "current_status": "closed",
+                "date_created": "2025-01-01T10:00:00Z",
+                "date_modified": "2025-01-02T15:30:00Z",
+                "target_repository": "mozilla/firefox",
+                "bug_id": 1234567,
+                "date_landed": "2025-01-02T15:30:00Z",
+                "date_approved": "2025-01-02T12:00:00Z",
+                "labels": ["bug", "priority-high"],
+            }
+        ],
+        "commits": [
+            {
+                "pull_request_id": 12345,
+                "target_repository": "mozilla/firefox",
+                "commit_sha": "abc123def456",
+                "date_created": "2025-01-01T10:00:00Z",
+                "author_username": "Test Author",
+                "author_email": None,
+                "filename": "src/parser.py",
+                "lines_removed": 5,
+                "lines_added": 10,
+            }
+        ],
+        "reviewers": [
+            {
+                "pull_request_id": 12345,
+                "target_repository": "mozilla/firefox",
+                "date_reviewed": "2025-01-02T12:00:00Z",
+                "reviewer_email": None,
+                "reviewer_username": "reviewer_user",
+                "status": "APPROVED",
+            }
+        ],
+        "comments": [
+            {
+                "pull_request_id": 12345,
+                "target_repository": "mozilla/firefox",
+                "comment_id": 111222,
+                "date_created": "2025-01-01T14:00:00Z",
+                "author_email": None,
+                "author_username": "commenter_user",
+                "character_count": 43,
+                "status": None,
+            }
+        ],
+    }
+
+
+@pytest.fixture
+def mock_session() -> Mock:
+    """
+    Mock requests.Session with configurable responses.
+
+    Returns:
+        Mock session object with get() method
+    """
+    session = Mock(spec=requests.Session)
+    session.headers = {}
+    return session
+
+
+@pytest.fixture
+def mock_github_response() -> Mock:
+    """
+    Mock requests.Response for GitHub API calls.
+
+    Returns:
+        Mock response with status_code, json(), headers, and links
+    """
+    response = Mock(spec=requests.Response)
+    response.status_code = 200
+    response.headers = {
+        "X-RateLimit-Remaining": "5000",
+        "X-RateLimit-Reset": "1609459200",
+    }
+    response.links = {}
+    response.text = ""
+    return response
+
+
+@pytest.fixture
+def mock_rate_limited_response() -> Mock:
+    """
+    Mock requests.Response simulating rate limit exceeded.
+
+    Returns:
+        Mock response with 403 status and rate limit headers
+    """
+    response = Mock(spec=requests.Response)
+    response.status_code = 403
+    response.headers = {
+        "X-RateLimit-Remaining": "0",
+        "X-RateLimit-Reset": str(int(datetime.now(timezone.utc).timestamp()) + 3600),
+    }
+    response.text = "API rate limit exceeded"
+    return response
+
+
+@pytest.fixture
+def mock_bigquery_client() -> Mock:
+    """
+    Mock BigQuery client for testing load operations.
+
+    Returns:
+        Mock BigQuery client with insert_rows_json() method
+    """
+    client = Mock(spec=bigquery.Client)
+    client.project = "test-project"
+    client.insert_rows_json = MagicMock(return_value=[])  # Empty list = no errors
+    return client
+
+
+@pytest.fixture
+def mock_bigquery_client_with_errors() -> Mock:
+    """
+    Mock BigQuery client that returns insertion errors.
+
+    Returns:
+        Mock BigQuery client that simulates insert failures
+    """
+    client = Mock(spec=bigquery.Client)
+    client.project = "test-project"
+    client.insert_rows_json = MagicMock(
+        return_value=[
+            {
+                "index": 0,
+                "errors": [{"reason": "invalid", "message": "Invalid schema"}],
+            }
+        ]
+    )
+    return client
diff --git a/tests/test_extract_comments.py b/tests/test_extract_comments.py
new file mode 100644
index 0000000..25232b3
--- /dev/null
+++ b/tests/test_extract_comments.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+"""
+Tests for extract_comments function.
+
+Tests comment extraction including endpoint verification, rate limiting,
+and error handling.
+"""
+
+from unittest.mock import Mock, patch
+
+import pytest
+
+import main
+
+
+def test_extract_comments_basic(mock_session):
+    """Test basic extraction of comments."""
+    comments_response = Mock()
+    comments_response.status_code = 200
+    comments_response.json.return_value = [
+        {
+            "id": 456,
+            "user": {"login": "commenter1"},
+            "body": "This looks good",
+            "created_at": "2024-01-01T14:00:00Z",
+        },
+        {
+            "id": 457,
+            "user": {"login": "commenter2"},
+            "body": "I have concerns",
+            "created_at": "2024-01-01T15:00:00Z",
+        },
+    ]
+
+    mock_session.get.return_value = comments_response
+
+    result = main.extract_comments(mock_session, "mozilla/firefox", 123)
+
+    assert len(result) == 2
+    assert result[0]["id"] == 456
+    assert result[1]["id"] == 457
+
+
+def test_uses_issues_endpoint(mock_session):
+    """Test that comments use /issues endpoint not /pulls."""
+    comments_response = Mock()
+    comments_response.status_code = 200
+    comments_response.json.return_value = []
+
+    mock_session.get.return_value = comments_response
+
+    main.extract_comments(mock_session, "mozilla/firefox", 123)
+
+    call_args = mock_session.get.call_args
+    url = call_args[0][0]
+    assert "/issues/123/comments" in url
+    assert "/pulls/123/comments" not in url
+
+
+def test_multiple_comments(mock_session):
+    """Test handling multiple comments."""
+    comments_response = Mock()
+    comments_response.status_code = 200
+    comments_response.json.return_value = [
+        {"id": i, "user": {"login": f"user{i}"}, "body": f"Comment {i}"}
+        for i in range(1, 11)
+    ]
+
+    mock_session.get.return_value = comments_response
+
+    result = main.extract_comments(mock_session, "mozilla/firefox", 123)
+
+    assert len(result) == 10
+
+
+def test_empty_comments_list(mock_session):
+    """Test handling PR with no comments."""
+    comments_response = Mock()
+    comments_response.status_code = 200
+    comments_response.json.return_value = []
+
+    mock_session.get.return_value = comments_response
+
+    result = main.extract_comments(mock_session, "mozilla/firefox", 123)
+
+    assert result == []
+
+
+@patch("main.sleep_for_rate_limit")
+def test_rate_limit_handling_comments(mock_sleep, mock_session):
+    """Test rate limit handling when fetching comments."""
+    rate_limit_response = Mock()
+    rate_limit_response.status_code = 403
+    rate_limit_response.headers = {"X-RateLimit-Remaining": "0"}
+
+    success_response = Mock()
+    success_response.status_code = 200
+    success_response.json.return_value = []
+
+    mock_session.get.side_effect = [rate_limit_response, success_response]
+
+    result = main.extract_comments(mock_session, "mozilla/firefox", 123)
+
+    mock_sleep.assert_called_once()
+    assert result == []
+
+
+def test_api_error_comments(mock_session):
+    """Test API error handling when fetching comments."""
+    error_response = Mock()
+    error_response.status_code = 404
+    error_response.text = "Not Found"
+
+    mock_session.get.return_value = error_response
+
+    with pytest.raises(SystemExit) as exc_info:
+        main.extract_comments(mock_session, "mozilla/firefox", 123)
+
+    assert "GitHub API error 404" in str(exc_info.value)
+
+
+def test_custom_github_api_url_comments(mock_session):
+    """Test using custom GitHub API URL for comments."""
+    custom_url = "https://mock-github.example.com"
+
+    comments_response = Mock()
+    comments_response.status_code = 200
+    comments_response.json.return_value = []
+
+    mock_session.get.return_value = comments_response
+
+    main.extract_comments(
+        mock_session, "mozilla/firefox", 123, github_api_url=custom_url
+    )
+
+    call_args = mock_session.get.call_args
+    assert custom_url in call_args[0][0]
diff --git a/tests/test_extract_commits.py b/tests/test_extract_commits.py
new file mode 100644
index 0000000..bccc8b5
--- /dev/null
+++ b/tests/test_extract_commits.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python3
+"""
+Tests for extract_commits function.
+
+Tests commit extraction including file details, rate limiting, and error handling.
+"""
+
+from unittest.mock import Mock, patch
+
+import pytest
+
+import main
+
+
+def test_extract_commits_with_files(mock_session):
+    """Test extracting commits with file details."""
+    # Mock commits list response
+    commits_response = Mock()
+    commits_response.status_code = 200
+    commits_response.json.return_value = [
+        {"sha": "abc123"},
+        {"sha": "def456"},
+    ]
+
+    # Mock individual commit responses
+    commit_detail_1 = Mock()
+    commit_detail_1.status_code = 200
+    commit_detail_1.json.return_value = {
+        "sha": "abc123",
+        "files": [{"filename": "file1.py", "additions": 10}],
+    }
+
+    commit_detail_2 = Mock()
+    commit_detail_2.status_code = 200
+    commit_detail_2.json.return_value = {
+        "sha": "def456",
+        "files": [{"filename": "file2.py", "deletions": 5}],
+    }
+
+    mock_session.get.side_effect = [
+        commits_response,
+        commit_detail_1,
+        commit_detail_2,
+    ]
+
+    result = main.extract_commits(mock_session, "mozilla/firefox", 123)
+
+    assert len(result) == 2
+    assert result[0]["sha"] == "abc123"
+    assert result[0]["files"][0]["filename"] == "file1.py"
+    assert result[1]["sha"] == "def456"
+    assert result[1]["files"][0]["filename"] == "file2.py"
+
+
+def test_multiple_files_per_commit(mock_session):
+    """Test handling multiple files in a single commit."""
+    commits_response = Mock()
+    commits_response.status_code = 200
+    commits_response.json.return_value = [{"sha": "abc123"}]
+
+    commit_detail = Mock()
+    commit_detail.status_code = 200
+    commit_detail.json.return_value = {
+        "sha": "abc123",
+        "files": [
+            {"filename": "file1.py", "additions": 10},
+            {"filename": "file2.py", "additions": 20},
+            {"filename": "file3.py", "deletions": 5},
+        ],
+    }
+
+    mock_session.get.side_effect = [commits_response, commit_detail]
+
+    result = main.extract_commits(mock_session, "mozilla/firefox", 123)
+
+    assert len(result) == 1
+    assert len(result[0]["files"]) == 3
+
+
+@patch("main.sleep_for_rate_limit")
+def test_rate_limit_on_commits_list(mock_sleep, mock_session):
+    """Test rate limit handling when fetching commits list."""
+    # Rate limit response
+    rate_limit_response = Mock()
+    rate_limit_response.status_code = 403
+    rate_limit_response.headers = {"X-RateLimit-Remaining": "0"}
+
+    # Success response
+    success_response = Mock()
+    success_response.status_code = 200
+    success_response.json.return_value = []
+
+    mock_session.get.side_effect = [rate_limit_response, success_response]
+
+    result = main.extract_commits(mock_session, "mozilla/firefox", 123)
+
+    mock_sleep.assert_called_once()
+    assert result == []
+
+
+def test_api_error_on_commits_list(mock_session):
+    """Test API error handling when fetching commits list."""
+    error_response = Mock()
+    error_response.status_code = 500
+    error_response.text = "Internal Server Error"
+
+    mock_session.get.return_value = error_response
+
+    with pytest.raises(SystemExit) as exc_info:
+        main.extract_commits(mock_session, "mozilla/firefox", 123)
+
+    assert "GitHub API error 500" in str(exc_info.value)
+
+
+def test_api_error_on_individual_commit(mock_session):
+    """Test API error when fetching individual commit details."""
+    commits_response = Mock()
+    commits_response.status_code = 200
+    commits_response.json.return_value = [{"sha": "abc123"}]
+
+    commit_error = Mock()
+    commit_error.status_code = 404
+    commit_error.text = "Commit not found"
+
+    mock_session.get.side_effect = [commits_response, commit_error]
+
+    with pytest.raises(SystemExit) as exc_info:
+        main.extract_commits(mock_session, "mozilla/firefox", 123)
+
+    assert "GitHub API error 404" in str(exc_info.value)
+
+
+def test_commit_without_sha_field(mock_session):
+    """Test handling commits without sha field."""
+    commits_response = Mock()
+    commits_response.status_code = 200
+    commits_response.json.return_value = [
+        {"sha": "abc123"},
+        {},  # Missing sha field
+    ]
+
+    commit_detail_1 = Mock()
+    commit_detail_1.status_code = 200
+    commit_detail_1.json.return_value = {"sha": "abc123", "files": []}
+
+    commit_detail_2 = Mock()
+    commit_detail_2.status_code = 200
+    commit_detail_2.json.return_value = {"files": []}
+
+    mock_session.get.side_effect = [
+        commits_response,
+        commit_detail_1,
+        commit_detail_2,
+    ]
+
+    result = main.extract_commits(mock_session, "mozilla/firefox", 123)
+
+    # Should handle the commit without sha gracefully
+    assert len(result) == 2
+
+
+def test_custom_github_api_url_commits(mock_session):
+    """Test using custom GitHub API URL for commits."""
+    custom_url = "https://mock-github.example.com"
+
+    commits_response = Mock()
+    commits_response.status_code = 200
+    commits_response.json.return_value = []
+
+    mock_session.get.return_value = commits_response
+
+    main.extract_commits(
+        mock_session, "mozilla/firefox", 123, github_api_url=custom_url
+    )
+
+    call_args = mock_session.get.call_args
+    assert custom_url in call_args[0][0]
+
+
+def test_empty_commits_list(mock_session):
+    """Test handling PR with no commits."""
+    commits_response = Mock()
+    commits_response.status_code = 200
+    commits_response.json.return_value = []
+
+    mock_session.get.return_value = commits_response
+
+    result = main.extract_commits(mock_session, "mozilla/firefox", 123)
+
+    assert result == []
diff --git a/tests/test_extract_pull_requests.py b/tests/test_extract_pull_requests.py
new file mode 100644
index 0000000..b6325fb
--- /dev/null
+++ b/tests/test_extract_pull_requests.py
@@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+"""
+Tests for extract_pull_requests function.
+
+Tests pull request extraction including pagination, rate limiting, error handling,
+and enrichment with commits, reviewers, and comments.
+"""
+
+from unittest.mock import Mock, patch
+
+import pytest
+
+import main
+
+
+def test_extract_pull_requests_basic(mock_session):
+    """Test basic extraction of pull requests."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [
+        {"number": 1, "title": "PR 1"},
+        {"number": 2, "title": "PR 2"},
+    ]
+    mock_response.links = {}
+
+    mock_session.get.return_value = mock_response
+
+    # Mock the extract functions
+    with (
+        patch("main.extract_commits", return_value=[]),
+        patch("main.extract_reviewers", return_value=[]),
+        patch("main.extract_comments", return_value=[]),
+    ):
+        result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+
+    assert len(result) == 1
+    assert len(result[0]) == 2
+    assert result[0][0]["number"] == 1
+    assert result[0][1]["number"] == 2
+
+
+def test_extract_multiple_pages(mock_session):
+    """Test extracting data across multiple pages with pagination."""
+    # First page response
+    mock_response_1 = Mock()
+    mock_response_1.status_code = 200
+    mock_response_1.json.return_value = [
+        {"number": 1, "title": "PR 1"},
+        {"number": 2, "title": "PR 2"},
+    ]
+    mock_response_1.links = {
+        "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"}
+    }
+
+    # Second page response
+    mock_response_2 = Mock()
+    mock_response_2.status_code = 200
+    mock_response_2.json.return_value = [{"number": 3, "title": "PR 3"}]
+    mock_response_2.links = {}
+
+    mock_session.get.side_effect = [mock_response_1, mock_response_2]
+
+    with (
+        patch("main.extract_commits", return_value=[]),
+        patch("main.extract_reviewers", return_value=[]),
+        patch("main.extract_comments", return_value=[]),
+    ):
+        result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+
+    assert len(result) == 2
+    assert len(result[0]) == 2
+    assert len(result[1]) == 1
+    assert result[0][0]["number"] == 1
+    assert result[1][0]["number"] == 3
+
+
+def test_enriches_prs_with_commit_data(mock_session):
+    """Test that PRs are enriched with commit data."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [{"number": 1, "title": "PR 1"}]
+    mock_response.links = {}
+
+    mock_session.get.return_value = mock_response
+
+    mock_commits = [{"sha": "abc123"}]
+
+    with (
+        patch(
+            "main.extract_commits", return_value=mock_commits
+        ) as mock_extract_commits,
+        patch("main.extract_reviewers", return_value=[]),
+        patch("main.extract_comments", return_value=[]),
+    ):
+        result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+
+    assert result[0][0]["commit_data"] == mock_commits
+    mock_extract_commits.assert_called_once()
+
+
+def test_enriches_prs_with_reviewer_data(mock_session):
+    """Test that PRs are enriched with reviewer data."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [{"number": 1, "title": "PR 1"}]
+    mock_response.links = {}
+
+    mock_session.get.return_value = mock_response
+
+    mock_reviewers = [{"id": 789, "state": "APPROVED"}]
+
+    with (
+        patch("main.extract_commits", return_value=[]),
+        patch(
+            "main.extract_reviewers", return_value=mock_reviewers
+        ) as mock_extract_reviewers,
+        patch("main.extract_comments", return_value=[]),
+    ):
+        result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+
+    assert result[0][0]["reviewer_data"] == mock_reviewers
+    mock_extract_reviewers.assert_called_once()
+
+
+def test_enriches_prs_with_comment_data(mock_session):
+    """Test that PRs are enriched with comment data."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [{"number": 1, "title": "PR 1"}]
+    mock_response.links = {}
+
+    mock_session.get.return_value = mock_response
+
+    mock_comments = [{"id": 456, "body": "Great work!"}]
+
+    with (
+        patch("main.extract_commits", return_value=[]),
+        patch("main.extract_reviewers", return_value=[]),
+        patch(
+            "main.extract_comments", return_value=mock_comments
+        ) as mock_extract_comments,
+    ):
+        result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+
+    assert result[0][0]["comment_data"] == mock_comments
+    mock_extract_comments.assert_called_once()
+
+
+@patch("main.sleep_for_rate_limit")
+def test_handles_rate_limit(mock_sleep, mock_session):
+    """Test that extract_pull_requests handles rate limiting correctly."""
+    # Rate limit response
+    mock_response_rate_limit = Mock()
+    mock_response_rate_limit.status_code = 403
+    mock_response_rate_limit.headers = {"X-RateLimit-Remaining": "0"}
+
+    # Successful response after rate limit
+    mock_response_success = Mock()
+    mock_response_success.status_code = 200
+    mock_response_success.json.return_value = [{"number": 1, "title": "PR 1"}]
+    mock_response_success.links = {}
+
+    mock_session.get.side_effect = [
+        mock_response_rate_limit,
+        mock_response_success,
+    ]
+
+    with (
+        patch("main.extract_commits", return_value=[]),
+        patch("main.extract_reviewers", return_value=[]),
+        patch("main.extract_comments", return_value=[]),
+    ):
+        result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+
+    mock_sleep.assert_called_once_with(mock_response_rate_limit)
+    assert len(result) == 1
+
+
+def test_handles_api_error_404(mock_session):
+    """Test that extract_pull_requests raises SystemExit on 404."""
+    mock_response = Mock()
+    mock_response.status_code = 404
+    mock_response.text = "Not Found"
+
+    mock_session.get.return_value = mock_response
+
+    with pytest.raises(SystemExit) as exc_info:
+        list(main.extract_pull_requests(mock_session, "mozilla/nonexistent"))
+
+    assert "GitHub API error 404" in str(exc_info.value)
+
+
+def test_handles_api_error_500(mock_session):
+    """Test that extract_pull_requests raises SystemExit on 500."""
+    mock_response = Mock()
+    mock_response.status_code = 500
+    mock_response.text = "Internal Server Error"
+
+    mock_session.get.return_value = mock_response
+
+    with pytest.raises(SystemExit) as exc_info:
+        list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+
+    assert "GitHub API error 500" in str(exc_info.value)
+
+
+def test_stops_on_empty_batch(mock_session):
+    """Test that extraction stops when an empty batch is returned."""
+    # First page with data
+    mock_response_1 = Mock()
+    mock_response_1.status_code = 200
+    mock_response_1.json.return_value = [{"number": 1}]
+    mock_response_1.links = {
+        "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"}
+    }
+
+    # Second page empty
+    mock_response_2 = Mock()
+    mock_response_2.status_code = 200
+    mock_response_2.json.return_value = []
+    mock_response_2.links = {}
+
+    mock_session.get.side_effect = [mock_response_1, mock_response_2]
+
+    with (
+        patch("main.extract_commits", return_value=[]),
+        patch("main.extract_reviewers", return_value=[]),
+        patch("main.extract_comments", return_value=[]),
+    ):
+        result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+
+    # Should only have 1 chunk from first page
+    assert len(result) == 1
+    assert len(result[0]) == 1
+
+
+def test_invalid_page_number_handling(mock_session):
+    """Test handling of invalid page number in pagination."""
+    mock_response_1 = Mock()
+    mock_response_1.status_code = 200
+    mock_response_1.json.return_value = [{"number": 1}]
+    mock_response_1.links = {
+        "next": {
+            "url": "https://api.github.com/repos/mozilla/firefox/pulls?page=invalid"
+        }
+    }
+
+    mock_session.get.return_value = mock_response_1
+
+    with (
+        patch("main.extract_commits", return_value=[]),
+        patch("main.extract_reviewers", return_value=[]),
+        patch("main.extract_comments", return_value=[]),
+    ):
+        result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+
+    # Should stop pagination on invalid page number
+    assert len(result) == 1
+
+
+def test_custom_github_api_url(mock_session):
+    """Test using custom GitHub API URL."""
+    custom_url = "https://mock-github.example.com"
+
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [{"number": 1}]
+    mock_response.links = {}
+
+    mock_session.get.return_value = mock_response
+
+    with (
+        patch("main.extract_commits", return_value=[]),
+        patch("main.extract_reviewers", return_value=[]),
+        patch("main.extract_comments", return_value=[]),
+    ):
+        list(
+            main.extract_pull_requests(
+                mock_session, "mozilla/firefox", github_api_url=custom_url
+            )
+        )
+
+    # Verify custom URL was used
+    call_args = mock_session.get.call_args
+    assert custom_url in call_args[0][0]
+
+
+def test_skips_prs_without_number_field(mock_session):
+    """Test that PRs without 'number' field are skipped."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [
+        {"number": 1, "title": "PR 1"},
+        {"title": "PR without number"},  # Missing number field
+        {"number": 2, "title": "PR 2"},
+    ]
+    mock_response.links = {}
+
+    mock_session.get.return_value = mock_response
+
+    with (
+        patch("main.extract_commits", return_value=[]) as mock_commits,
+        patch("main.extract_reviewers", return_value=[]),
+        patch("main.extract_comments", return_value=[]),
+    ):
+        list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+
+    # extract_commits should only be called for PRs with number field
+    assert mock_commits.call_count == 2
diff --git a/tests/test_extract_reviewers.py b/tests/test_extract_reviewers.py
new file mode 100644
index 0000000..7df4b43
--- /dev/null
+++ b/tests/test_extract_reviewers.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+"""
+Tests for extract_reviewers function.
+
+Tests reviewer extraction including different review states, rate limiting,
+and error handling.
+"""
+
+from unittest.mock import Mock, patch
+
+import pytest
+
+import main
+
+
+def test_extract_reviewers_basic(mock_session):
+    """Test basic extraction of reviewers."""
+    reviewers_response = Mock()
+    reviewers_response.status_code = 200
+    reviewers_response.json.return_value = [
+        {
+            "id": 789,
+            "user": {"login": "reviewer1"},
+            "state": "APPROVED",
+            "submitted_at": "2024-01-01T15:00:00Z",
+        },
+        {
+            "id": 790,
+            "user": {"login": "reviewer2"},
+            "state": "CHANGES_REQUESTED",
+            "submitted_at": "2024-01-01T16:00:00Z",
+        },
+    ]
+
+    mock_session.get.return_value = reviewers_response
+
+    result = main.extract_reviewers(mock_session, "mozilla/firefox", 123)
+
+    assert len(result) == 2
+    assert result[0]["state"] == "APPROVED"
+    assert result[1]["state"] == "CHANGES_REQUESTED"
+
+
+def test_multiple_review_states(mock_session):
+    """Test handling multiple different review states."""
+    reviewers_response = Mock()
+    reviewers_response.status_code = 200
+    reviewers_response.json.return_value = [
+        {"id": 1, "state": "APPROVED", "user": {"login": "user1"}},
+        {"id": 2, "state": "CHANGES_REQUESTED", "user": {"login": "user2"}},
+        {"id": 3, "state": "COMMENTED", "user": {"login": "user3"}},
+        {"id": 4, "state": "DISMISSED", "user": {"login": "user4"}},
+    ]
+
+    mock_session.get.return_value = reviewers_response
+
+    result = main.extract_reviewers(mock_session, "mozilla/firefox", 123)
+
+    assert len(result) == 4
+    states = [r["state"] for r in result]
+    assert "APPROVED" in states
+    assert "CHANGES_REQUESTED" in states
+    assert "COMMENTED" in states
+
+
+def test_empty_reviewers_list(mock_session):
+    """Test handling PR with no reviewers."""
+    reviewers_response = Mock()
+    reviewers_response.status_code = 200
+    reviewers_response.json.return_value = []
+
+    mock_session.get.return_value = reviewers_response
+
+    result = main.extract_reviewers(mock_session, "mozilla/firefox", 123)
+
+    assert result == []
+
+
+@patch("main.sleep_for_rate_limit")
+def test_rate_limit_handling(mock_sleep, mock_session):
+    """Test rate limit handling when fetching reviewers."""
+    rate_limit_response = Mock()
+    rate_limit_response.status_code = 403
+    rate_limit_response.headers = {"X-RateLimit-Remaining": "0"}
+
+    success_response = Mock()
+    success_response.status_code = 200
+    success_response.json.return_value = []
+
+    mock_session.get.side_effect = [rate_limit_response, success_response]
+
+    result = main.extract_reviewers(mock_session, "mozilla/firefox", 123)
+
+    mock_sleep.assert_called_once()
+    assert result == []
+
+
+def test_api_error(mock_session):
+    """Test API error handling when fetching reviewers."""
+    error_response = Mock()
+    error_response.status_code = 500
+    error_response.text = "Internal Server Error"
+
+    mock_session.get.return_value = error_response
+
+    with pytest.raises(SystemExit) as exc_info:
+        main.extract_reviewers(mock_session, "mozilla/firefox", 123)
+
+    assert "GitHub API error 500" in str(exc_info.value)
+
+
+def test_custom_github_api_url_reviewers(mock_session):
+    """Test using custom GitHub API URL for reviewers."""
+    custom_url = "https://mock-github.example.com"
+
+    reviewers_response = Mock()
+    reviewers_response.status_code = 200
+    reviewers_response.json.return_value = []
+
+    mock_session.get.return_value = reviewers_response
+
+    main.extract_reviewers(
+        mock_session, "mozilla/firefox", 123, github_api_url=custom_url
+    )
+
+    call_args = mock_session.get.call_args
+    assert custom_url in call_args[0][0]
diff --git a/tests/test_load_data.py b/tests/test_load_data.py
new file mode 100644
index 0000000..0203288
--- /dev/null
+++ b/tests/test_load_data.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+"""
+Tests for load_data function.
+
+Tests BigQuery data loading including table insertion, snapshot dates,
+and error handling.
+"""
+
+from unittest.mock import patch
+
+import pytest
+
+import main
+
+
+@patch("main.datetime")
+def test_load_data_inserts_all_tables(mock_datetime, mock_bigquery_client):
+    """Test that load_data inserts all tables correctly."""
+    mock_datetime.now.return_value.strftime.return_value = "2024-01-15"
+
+    transformed_data = {
+        "pull_requests": [{"pull_request_id": 1}],
+        "commits": [{"commit_sha": "abc"}],
+        "reviewers": [{"reviewer_username": "user1"}],
+        "comments": [{"comment_id": 123}],
+    }
+
+    main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+
+    # Should call insert_rows_json 4 times (once per table)
+    assert mock_bigquery_client.insert_rows_json.call_count == 4
+
+
+@patch("main.datetime")
+def test_adds_snapshot_date(mock_datetime, mock_bigquery_client):
+    """Test that snapshot_date is added to all rows."""
+    mock_datetime.now.return_value.strftime.return_value = "2024-01-15"
+
+    transformed_data = {
+        "pull_requests": [{"pull_request_id": 1}, {"pull_request_id": 2}],
+        "commits": [],
+        "reviewers": [],
+        "comments": [],
+    }
+
+    main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+
+    call_args = mock_bigquery_client.insert_rows_json.call_args
+    rows = call_args[0][1]
+    assert all(row["snapshot_date"] == "2024-01-15" for row in rows)
+
+
+def test_constructs_correct_table_ref(mock_bigquery_client):
+    """Test that table_ref is constructed correctly."""
+    transformed_data = {
+        "pull_requests": [{"pull_request_id": 1}],
+        "commits": [],
+        "reviewers": [],
+        "comments": [],
+    }
+
+    main.load_data(mock_bigquery_client, "my_dataset", transformed_data)
+
+    call_args = mock_bigquery_client.insert_rows_json.call_args
+    table_ref = call_args[0][0]
+    assert table_ref == "test-project.my_dataset.pull_requests"
+
+
+def test_empty_transformed_data_skipped(mock_bigquery_client):
+    """Test that empty transformed_data dict is skipped."""
+    transformed_data = {}
+
+    main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+
+    mock_bigquery_client.insert_rows_json.assert_not_called()
+
+
+def test_skips_empty_tables_individually(mock_bigquery_client):
+    """Test that empty tables are skipped individually."""
+    transformed_data = {
+        "pull_requests": [{"pull_request_id": 1}],
+        "commits": [],  # Empty, should be skipped
+        "reviewers": [],  # Empty, should be skipped
+        "comments": [{"comment_id": 456}],
+    }
+
+    main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+
+    # Should only call insert_rows_json twice (for PRs and comments)
+    assert mock_bigquery_client.insert_rows_json.call_count == 2
+
+
+def test_only_pull_requests_table(mock_bigquery_client):
+    """Test loading only pull_requests table."""
+    transformed_data = {
+        "pull_requests": [{"pull_request_id": 1}],
+        "commits": [],
+        "reviewers": [],
+        "comments": [],
+    }
+
+    main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+
+    assert mock_bigquery_client.insert_rows_json.call_count == 1
+
+
+def test_raises_exception_on_insert_errors(mock_bigquery_client):
+    """Test that Exception is raised on BigQuery insert errors."""
+    mock_bigquery_client.insert_rows_json.return_value = [
+        {"index": 0, "errors": ["Insert failed"]}
+    ]
+
+    transformed_data = {
+        "pull_requests": [{"pull_request_id": 1}],
+        "commits": [],
+        "reviewers": [],
+        "comments": [],
+    }
+
+    with pytest.raises(Exception) as exc_info:
+        main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+
+    assert "BigQuery insert errors" in str(exc_info.value)
+
+
+def test_verifies_client_insert_called_correctly(mock_bigquery_client):
+    """Test that client.insert_rows_json is called with correct arguments."""
+    transformed_data = {
+        "pull_requests": [{"pull_request_id": 1}, {"pull_request_id": 2}],
+        "commits": [],
+        "reviewers": [],
+        "comments": [],
+    }
+
+    main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+
+    call_args = mock_bigquery_client.insert_rows_json.call_args
+    table_ref, rows = call_args[0]
+
+    assert "pull_requests" in table_ref
+    assert len(rows) == 2
diff --git a/tests/test_logging.py b/tests/test_logging.py
new file mode 100644
index 0000000..10730d1
--- /dev/null
+++ b/tests/test_logging.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+"""
+Tests for setup_logging function.
+
+Tests logging configuration including log level and handler setup.
+"""
+
+import logging
+
+import main
+
+
+def test_setup_logging():
+    """Test that setup_logging configures logging correctly."""
+    main.setup_logging()
+
+    root_logger = logging.getLogger()
+    assert root_logger.level == logging.INFO
+    assert len(root_logger.handlers) > 0
+
+    # Check that at least one handler is a StreamHandler
+    has_stream_handler = any(
+        isinstance(handler, logging.StreamHandler) for handler in root_logger.handlers
+    )
+    assert has_stream_handler
diff --git a/tests/test_main_integration.py b/tests/test_main_integration.py
new file mode 100644
index 0000000..e09d940
--- /dev/null
+++ b/tests/test_main_integration.py
@@ -0,0 +1,544 @@
+#!/usr/bin/env python3
+"""
+Tests for main function and full ETL integration.
+
+Tests main orchestration including environment variables, session setup,
+repository processing, chunked ETL flow, and end-to-end integration tests.
+"""
+
+import os
+from unittest.mock import MagicMock, Mock, patch
+
+import pytest
+
+import main
+
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_requires_github_repos(mock_session_class, mock_bq_client, mock_setup_logging):
+    """Test that GITHUB_REPOS is required."""
+    with patch.dict(
+        os.environ,
+        {"BIGQUERY_PROJECT": "test", "BIGQUERY_DATASET": "test"},
+        clear=True,
+    ):
+        with pytest.raises(SystemExit) as exc_info:
+            main.main()
+
+        assert "GITHUB_REPOS" in str(exc_info.value)
+
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_requires_bigquery_project(
+    mock_session_class, mock_bq_client, mock_setup_logging
+):
+    """Test that BIGQUERY_PROJECT is required."""
+    with patch.dict(
+        os.environ,
+        {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_DATASET": "test"},
+        clear=True,
+    ):
+        with pytest.raises(SystemExit) as exc_info:
+            main.main()
+
+        assert "BIGQUERY_PROJECT" in str(exc_info.value)
+
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_requires_bigquery_dataset(
+    mock_session_class, mock_bq_client, mock_setup_logging
+):
+    """Test that BIGQUERY_DATASET is required."""
+    with patch.dict(
+        os.environ,
+        {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_PROJECT": "test"},
+        clear=True,
+    ):
+        with pytest.raises(SystemExit) as exc_info:
+            main.main()
+
+        assert "BIGQUERY_DATASET" in str(exc_info.value)
+
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_github_token_optional_with_warning(
+    mock_session_class, mock_bq_client, mock_setup_logging
+):
+    """Test that GITHUB_TOKEN is optional but warns if missing."""
+    with (
+        patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+            },
+            clear=True,
+        ),
+        patch("main.extract_pull_requests", return_value=iter([])),
+    ):
+        # Should not raise, but should log warning
+        result = main.main()
+        assert result == 0
+
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_splits_github_repos_by_comma(
+    mock_session_class, mock_bq_client, mock_setup_logging
+):
+    """Test that GITHUB_REPOS is split by comma."""
+    with (
+        patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "token",
+            },
+            clear=True,
+        ),
+        patch("main.extract_pull_requests", return_value=iter([])) as mock_extract,
+    ):
+        main.main()
+
+        # Should be called twice (once per repo)
+        assert mock_extract.call_count == 2
+
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_honors_github_api_url(mock_session_class, mock_bq_client, mock_setup_logging):
+    """Test that GITHUB_API_URL is honored."""
+    with (
+        patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "token",
+                "GITHUB_API_URL": "https://custom-api.example.com",
+            },
+            clear=True,
+        ),
+        patch("main.extract_pull_requests", return_value=iter([])) as mock_extract,
+    ):
+        main.main()
+
+        call_kwargs = mock_extract.call_args[1]
+        assert call_kwargs["github_api_url"] == "https://custom-api.example.com"
+
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_honors_bigquery_emulator_host(
+    mock_session_class, mock_bq_client_class, mock_setup_logging
+):
+    """Test that BIGQUERY_EMULATOR_HOST is honored."""
+    with (
+        patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "token",
+                "BIGQUERY_EMULATOR_HOST": "http://localhost:9050",
+            },
+            clear=True,
+        ),
+        patch("main.extract_pull_requests", return_value=iter([])),
+    ):
+        main.main()
+
+        # Verify BigQuery client was created with emulator settings
+        mock_bq_client_class.assert_called_once()
+
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_creates_session_with_headers(
+    mock_session_class, mock_bq_client, mock_setup_logging
+):
+    """Test that session is created with Accept and User-Agent headers."""
+    mock_session = MagicMock()
+    mock_session_class.return_value = mock_session
+
+    with (
+        patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "token",
+            },
+            clear=True,
+        ),
+        patch("main.extract_pull_requests", return_value=iter([])),
+    ):
+        main.main()
+
+        # Verify session headers were set
+        assert mock_session.headers.update.called
+        call_args = mock_session.headers.update.call_args[0][0]
+        assert "Accept" in call_args
+        assert "User-Agent" in call_args
+
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_sets_authorization_header_with_token(
+    mock_session_class, mock_bq_client, mock_setup_logging
+):
+    """Test that Authorization header is set when token provided."""
+    mock_session = MagicMock()
+    mock_session_class.return_value = mock_session
+
+    with (
+        patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "test-token-123",
+            },
+            clear=True,
+        ),
+        patch("main.extract_pull_requests", return_value=iter([])),
+    ):
+        main.main()
+
+        # Verify Authorization header was set
+        assert mock_session.headers.__setitem__.called
+
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+@patch("main.extract_pull_requests")
+@patch("main.transform_data")
+@patch("main.load_data")
+def test_single_repo_successful_etl(
+    mock_load,
+    mock_transform,
+    mock_extract,
+    mock_session_class,
+    mock_bq_client,
+    mock_setup_logging,
+):
+    """Test successful ETL for single repository."""
+    mock_extract.return_value = iter([[{"number": 1}]])
+    mock_transform.return_value = {
+        "pull_requests": [{"pull_request_id": 1}],
+        "commits": [],
+        "reviewers": [],
+        "comments": [],
+    }
+
+    with patch.dict(
+        os.environ,
+        {
+            "GITHUB_REPOS": "mozilla/firefox",
+            "BIGQUERY_PROJECT": "test",
+            "BIGQUERY_DATASET": "test",
+            "GITHUB_TOKEN": "token",
+        },
+        clear=True,
+    ):
+        result = main.main()
+
+    assert result == 0
+    mock_extract.assert_called_once()
+    mock_transform.assert_called_once()
+    mock_load.assert_called_once()
+
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+@patch("main.extract_pull_requests")
+@patch("main.transform_data")
+@patch("main.load_data")
+def test_multiple_repos_processing(
+    mock_load,
+    mock_transform,
+    mock_extract,
+    mock_session_class,
+    mock_bq_client,
+    mock_setup_logging,
+):
+    """Test processing multiple repositories."""
+    mock_extract.return_value = iter([[{"number": 1}]])
+    mock_transform.return_value = {
+        "pull_requests": [{"pull_request_id": 1}],
+        "commits": [],
+        "reviewers": [],
+        "comments": [],
+    }
+
+    with patch.dict(
+        os.environ,
+        {
+            "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev,mozilla/addons",
+            "BIGQUERY_PROJECT": "test",
+            "BIGQUERY_DATASET": "test",
+            "GITHUB_TOKEN": "token",
+        },
+        clear=True,
+    ):
+        result = main.main()
+
+    assert result == 0
+    # Should process 3 repositories
+    assert mock_extract.call_count == 3
+
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+@patch("main.extract_pull_requests")
+@patch("main.transform_data")
+@patch("main.load_data")
+def test_processes_chunks_iteratively(
+    mock_load,
+    mock_transform,
+    mock_extract,
+    mock_session_class,
+    mock_bq_client,
+    mock_setup_logging,
+):
+    """Test that chunks are processed iteratively from generator."""
+    # Return 3 chunks
+    mock_extract.return_value = iter(
+        [
+            [{"number": 1}],
+            [{"number": 2}],
+            [{"number": 3}],
+        ]
+    )
+    mock_transform.return_value = {
+        "pull_requests": [{"pull_request_id": 1}],
+        "commits": [],
+        "reviewers": [],
+        "comments": [],
+    }
+
+    with patch.dict(
+        os.environ,
+        {
+            "GITHUB_REPOS": "mozilla/firefox",
+            "BIGQUERY_PROJECT": "test",
+            "BIGQUERY_DATASET": "test",
+            "GITHUB_TOKEN": "token",
+        },
+        clear=True,
+    ):
+        result = main.main()
+
+    assert result == 0
+    # Transform and load should be called 3 times (once per chunk)
+    assert mock_transform.call_count == 3
+    assert mock_load.call_count == 3
+
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_returns_zero_on_success(
+    mock_session_class, mock_bq_client, mock_setup_logging
+):
+    """Test that main returns 0 on success."""
+    with (
+        patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "token",
+            },
+            clear=True,
+        ),
+        patch("main.extract_pull_requests", return_value=iter([])),
+    ):
+        result = main.main()
+
+    assert result == 0
+
+
+@pytest.mark.integration
+@patch("main.setup_logging")
+@patch("main.load_data")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_full_etl_flow_transforms_data_correctly(
+    mock_session_class, mock_bq_client, mock_load, mock_setup_logging
+):
+    """Test full ETL flow with mocked GitHub responses."""
+    mock_session = MagicMock()
+    mock_session_class.return_value = mock_session
+
+    # Mock PR response
+    pr_response = Mock()
+    pr_response.status_code = 200
+    pr_response.json.return_value = [
+        {"number": 1, "title": "Bug 1234567 - Test PR", "state": "open"}
+    ]
+    pr_response.links = {}
+
+    # Mock commits, reviewers, comments responses
+    empty_response = Mock()
+    empty_response.status_code = 200
+    empty_response.json.return_value = []
+
+    mock_session.get.side_effect = [
+        pr_response,
+        empty_response,
+        empty_response,
+        empty_response,
+    ]
+
+    with patch.dict(
+        os.environ,
+        {
+            "GITHUB_REPOS": "mozilla/firefox",
+            "BIGQUERY_PROJECT": "test",
+            "BIGQUERY_DATASET": "test",
+            "GITHUB_TOKEN": "token",
+        },
+        clear=True,
+    ):
+        result = main.main()
+
+    assert result == 0
+    mock_load.assert_called_once()
+
+    # Verify transformed data structure
+    call_args = mock_load.call_args[0]
+    transformed_data = call_args[2]
+    assert "pull_requests" in transformed_data
+    assert len(transformed_data["pull_requests"]) == 1
+
+
+@patch("main.setup_logging")
+@patch("main.load_data")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_bug_id_extraction_through_pipeline(
+    mock_session_class, mock_bq_client, mock_load, mock_setup_logging
+):
+    """Test bug ID extraction through full pipeline."""
+    mock_session = MagicMock()
+    mock_session_class.return_value = mock_session
+
+    pr_response = Mock()
+    pr_response.status_code = 200
+    pr_response.json.return_value = [
+        {
+            "number": 1,
+            "title": "Bug 9876543 - Fix critical issue",
+            "state": "closed",
+        }
+    ]
+    pr_response.links = {}
+
+    empty_response = Mock()
+    empty_response.status_code = 200
+    empty_response.json.return_value = []
+
+    mock_session.get.side_effect = [
+        pr_response,
+        empty_response,
+        empty_response,
+        empty_response,
+    ]
+
+    with patch.dict(
+        os.environ,
+        {
+            "GITHUB_REPOS": "mozilla/firefox",
+            "BIGQUERY_PROJECT": "test",
+            "BIGQUERY_DATASET": "test",
+            "GITHUB_TOKEN": "token",
+        },
+        clear=True,
+    ):
+        main.main()
+
+    call_args = mock_load.call_args[0]
+    transformed_data = call_args[2]
+    pr = transformed_data["pull_requests"][0]
+    assert pr["bug_id"] == 9876543
+
+
+@patch("main.setup_logging")
+@patch("main.load_data")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_pagination_through_full_flow(
+    mock_session_class, mock_bq_client, mock_load, mock_setup_logging
+):
+    """Test pagination through full ETL flow."""
+    mock_session = MagicMock()
+    mock_session_class.return_value = mock_session
+
+    # First page
+    pr_response_1 = Mock()
+    pr_response_1.status_code = 200
+    pr_response_1.json.return_value = [{"number": 1, "title": "PR 1", "state": "open"}]
+    pr_response_1.links = {
+        "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"}
+    }
+
+    # Second page
+    pr_response_2 = Mock()
+    pr_response_2.status_code = 200
+    pr_response_2.json.return_value = [{"number": 2, "title": "PR 2", "state": "open"}]
+    pr_response_2.links = {}
+
+    empty_response = Mock()
+    empty_response.status_code = 200
+    empty_response.json.return_value = []
+
+    mock_session.get.side_effect = [
+        pr_response_1,
+        empty_response,
+        empty_response,
+        empty_response,
+        pr_response_2,
+        empty_response,
+        empty_response,
+        empty_response,
+    ]
+
+    with patch.dict(
+        os.environ,
+        {
+            "GITHUB_REPOS": "mozilla/firefox",
+            "BIGQUERY_PROJECT": "test",
+            "BIGQUERY_DATASET": "test",
+            "GITHUB_TOKEN": "token",
+        },
+        clear=True,
+    ):
+        main.main()
+
+    # Should be called twice (once per chunk/page)
+    assert mock_load.call_count == 2
diff --git a/tests/test_rate_limit.py b/tests/test_rate_limit.py
new file mode 100644
index 0000000..9d32961
--- /dev/null
+++ b/tests/test_rate_limit.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+"""
+Tests for sleep_for_rate_limit function.
+
+Tests rate limit handling including wait time calculation and edge cases.
+"""
+
+from unittest.mock import Mock, patch
+
+import main
+
+
+@patch("time.time")
+@patch("time.sleep")
+def test_sleep_for_rate_limit_calculates_wait_time(mock_sleep, mock_time):
+    """Test that sleep_for_rate_limit calculates correct wait time."""
+    mock_time.return_value = 1000
+
+    mock_response = Mock()
+    mock_response.headers = {
+        "X-RateLimit-Remaining": "0",
+        "X-RateLimit-Reset": "1120",  # 120 seconds from now
+    }
+
+    main.sleep_for_rate_limit(mock_response)
+
+    mock_sleep.assert_called_once_with(120)
+
+
+@patch("time.time")
+@patch("time.sleep")
+def test_sleep_for_rate_limit_when_reset_already_passed(mock_sleep, mock_time):
+    """Test that sleep_for_rate_limit doesn't sleep negative time."""
+    mock_time.return_value = 2000
+
+    mock_response = Mock()
+    mock_response.headers = {
+        "X-RateLimit-Remaining": "0",
+        "X-RateLimit-Reset": "1500",  # Already passed
+    }
+
+    main.sleep_for_rate_limit(mock_response)
+
+    # Should sleep for 0 seconds (max of 0 and negative value)
+    mock_sleep.assert_called_once_with(0)
+
+
+@patch("time.sleep")
+def test_sleep_for_rate_limit_when_remaining_not_zero(mock_sleep):
+    """Test that sleep_for_rate_limit doesn't sleep when remaining > 0."""
+    mock_response = Mock()
+    mock_response.headers = {
+        "X-RateLimit-Remaining": "5",
+        "X-RateLimit-Reset": "1500",
+    }
+
+    main.sleep_for_rate_limit(mock_response)
+
+    # Should not sleep when remaining > 0
+    mock_sleep.assert_not_called()
+
+
+@patch("time.sleep")
+def test_sleep_for_rate_limit_with_missing_headers(mock_sleep):
+    """Test sleep_for_rate_limit with missing rate limit headers."""
+    mock_response = Mock()
+    mock_response.headers = {}
+
+    main.sleep_for_rate_limit(mock_response)
+
+    # Should not sleep when headers are missing (defaults to remaining=1)
+    mock_sleep.assert_not_called()
diff --git a/tests/test_transform_data.py b/tests/test_transform_data.py
new file mode 100644
index 0000000..2b8353b
--- /dev/null
+++ b/tests/test_transform_data.py
@@ -0,0 +1,625 @@
+#!/usr/bin/env python3
+"""
+Tests for transform_data function.
+
+Tests data transformation including bug ID extraction, label processing,
+commit/reviewer/comment flattening, and field mapping.
+"""
+
+import main
+
+
+def test_transform_data_basic():
+    """Test basic transformation of pull request data."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "Fix login bug",
+            "state": "closed",
+            "created_at": "2024-01-01T10:00:00Z",
+            "updated_at": "2024-01-02T10:00:00Z",
+            "merged_at": "2024-01-02T12:00:00Z",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert len(result["pull_requests"]) == 1
+    pr = result["pull_requests"][0]
+    assert pr["pull_request_id"] == 123
+    assert pr["current_status"] == "closed"
+    assert pr["date_created"] == "2024-01-01T10:00:00Z"
+    assert pr["date_modified"] == "2024-01-02T10:00:00Z"
+    assert pr["date_landed"] == "2024-01-02T12:00:00Z"
+    assert pr["target_repository"] == "mozilla/firefox"
+
+
+def test_bug_id_extraction_basic():
+    """Test bug ID extraction from PR title."""
+    test_cases = [
+        ("Bug 1234567 - Fix issue", 1234567),
+        ("bug 1234567: Update code", 1234567),
+        ("Fix for bug 7654321", 7654321),
+        ("b=9876543 - Change behavior", 9876543),
+    ]
+
+    for title, expected_bug_id in test_cases:
+        raw_data = [
+            {
+                "number": 1,
+                "title": title,
+                "state": "open",
+                "labels": [],
+                "commit_data": [],
+                "reviewer_data": [],
+                "comment_data": [],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+        assert result["pull_requests"][0]["bug_id"] == expected_bug_id
+
+
+def test_bug_id_extraction_with_hash():
+    """Test bug ID extraction with # symbol."""
+    raw_data = [
+        {
+            "number": 1,
+            "title": "Bug #1234567 - Fix issue",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+    assert result["pull_requests"][0]["bug_id"] == 1234567
+
+
+def test_bug_id_filter_large_numbers():
+    """Test that bug IDs >= 100000000 are filtered out."""
+    raw_data = [
+        {
+            "number": 1,
+            "title": "Bug 999999999 - Invalid bug ID",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+    assert result["pull_requests"][0]["bug_id"] is None
+
+
+def test_bug_id_no_match():
+    """Test PR title with no bug ID."""
+    raw_data = [
+        {
+            "number": 1,
+            "title": "Update documentation",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+    assert result["pull_requests"][0]["bug_id"] is None
+
+
+def test_labels_extraction():
+    """Test labels array extraction."""
+    raw_data = [
+        {
+            "number": 1,
+            "title": "PR with labels",
+            "state": "open",
+            "labels": [
+                {"name": "bug"},
+                {"name": "priority-high"},
+                {"name": "needs-review"},
+            ],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+    labels = result["pull_requests"][0]["labels"]
+    assert len(labels) == 3
+    assert "bug" in labels
+    assert "priority-high" in labels
+    assert "needs-review" in labels
+
+
+def test_labels_empty_list():
+    """Test handling empty labels list."""
+    raw_data = [
+        {
+            "number": 1,
+            "title": "PR without labels",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+    assert result["pull_requests"][0]["labels"] == []
+
+
+def test_commit_transformation():
+    """Test commit fields mapping."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR with commits",
+            "state": "open",
+            "labels": [],
+            "commit_data": [
+                {
+                    "sha": "abc123",
+                    "commit": {
+                        "author": {
+                            "name": "Test Author",
+                            "date": "2024-01-01T12:00:00Z",
+                        }
+                    },
+                    "files": [
+                        {
+                            "filename": "src/main.py",
+                            "additions": 10,
+                            "deletions": 5,
+                        }
+                    ],
+                }
+            ],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert len(result["commits"]) == 1
+    commit = result["commits"][0]
+    assert commit["pull_request_id"] == 123
+    assert commit["target_repository"] == "mozilla/firefox"
+    assert commit["commit_sha"] == "abc123"
+    assert commit["date_created"] == "2024-01-01T12:00:00Z"
+    assert commit["author_username"] == "Test Author"
+    assert commit["filename"] == "src/main.py"
+    assert commit["lines_added"] == 10
+    assert commit["lines_removed"] == 5
+
+
+def test_commit_file_flattening():
+    """Test that each file becomes a separate row."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR with multiple files",
+            "state": "open",
+            "labels": [],
+            "commit_data": [
+                {
+                    "sha": "abc123",
+                    "commit": {"author": {"name": "Author", "date": "2024-01-01"}},
+                    "files": [
+                        {"filename": "file1.py", "additions": 10, "deletions": 5},
+                        {"filename": "file2.py", "additions": 20, "deletions": 2},
+                        {"filename": "file3.py", "additions": 5, "deletions": 15},
+                    ],
+                }
+            ],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    # Should have 3 rows in commits table (one per file)
+    assert len(result["commits"]) == 3
+    filenames = [c["filename"] for c in result["commits"]]
+    assert "file1.py" in filenames
+    assert "file2.py" in filenames
+    assert "file3.py" in filenames
+
+
+def test_multiple_commits_with_files():
+    """Test multiple commits with multiple files per PR."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR with multiple commits",
+            "state": "open",
+            "labels": [],
+            "commit_data": [
+                {
+                    "sha": "commit1",
+                    "commit": {"author": {"name": "Author1", "date": "2024-01-01"}},
+                    "files": [
+                        {"filename": "file1.py", "additions": 10, "deletions": 0}
+                    ],
+                },
+                {
+                    "sha": "commit2",
+                    "commit": {"author": {"name": "Author2", "date": "2024-01-02"}},
+                    "files": [
+                        {"filename": "file2.py", "additions": 5, "deletions": 2},
+                        {"filename": "file3.py", "additions": 8, "deletions": 3},
+                    ],
+                },
+            ],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    # Should have 3 rows total (1 file from commit1, 2 files from commit2)
+    assert len(result["commits"]) == 3
+    assert result["commits"][0]["commit_sha"] == "commit1"
+    assert result["commits"][1]["commit_sha"] == "commit2"
+    assert result["commits"][2]["commit_sha"] == "commit2"
+
+
+def test_reviewer_transformation():
+    """Test reviewer fields mapping."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR with reviewers",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [
+                {
+                    "id": 789,
+                    "user": {"login": "reviewer1"},
+                    "state": "APPROVED",
+                    "submitted_at": "2024-01-01T15:00:00Z",
+                }
+            ],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert len(result["reviewers"]) == 1
+    reviewer = result["reviewers"][0]
+    assert reviewer["pull_request_id"] == 123
+    assert reviewer["target_repository"] == "mozilla/firefox"
+    assert reviewer["reviewer_username"] == "reviewer1"
+    assert reviewer["status"] == "APPROVED"
+    assert reviewer["date_reviewed"] == "2024-01-01T15:00:00Z"
+
+
+def test_transform_multiple_review_states():
+    """Test transforming data with multiple review states."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR with multiple reviews",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [
+                {
+                    "id": 1,
+                    "user": {"login": "user1"},
+                    "state": "APPROVED",
+                    "submitted_at": "2024-01-01T15:00:00Z",
+                },
+                {
+                    "id": 2,
+                    "user": {"login": "user2"},
+                    "state": "CHANGES_REQUESTED",
+                    "submitted_at": "2024-01-01T16:00:00Z",
+                },
+                {
+                    "id": 3,
+                    "user": {"login": "user3"},
+                    "state": "COMMENTED",
+                    "submitted_at": "2024-01-01T17:00:00Z",
+                },
+            ],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert len(result["reviewers"]) == 3
+    states = [r["status"] for r in result["reviewers"]]
+    assert "APPROVED" in states
+    assert "CHANGES_REQUESTED" in states
+    assert "COMMENTED" in states
+
+
+def test_date_approved_from_earliest_approval():
+    """Test that date_approved is set to earliest APPROVED review."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR with multiple approvals",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [
+                {
+                    "id": 1,
+                    "user": {"login": "user1"},
+                    "state": "APPROVED",
+                    "submitted_at": "2024-01-02T15:00:00Z",
+                },
+                {
+                    "id": 2,
+                    "user": {"login": "user2"},
+                    "state": "APPROVED",
+                    "submitted_at": "2024-01-01T14:00:00Z",  # Earliest
+                },
+                {
+                    "id": 3,
+                    "user": {"login": "user3"},
+                    "state": "APPROVED",
+                    "submitted_at": "2024-01-03T16:00:00Z",
+                },
+            ],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    pr = result["pull_requests"][0]
+    assert pr["date_approved"] == "2024-01-01T14:00:00Z"
+
+
+def test_comment_transformation():
+    """Test comment fields mapping."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR with comments",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [
+                {
+                    "id": 456,
+                    "user": {"login": "commenter1"},
+                    "body": "This looks great!",
+                    "created_at": "2024-01-01T14:00:00Z",
+                    "pull_request_review_id": None,
+                }
+            ],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert len(result["comments"]) == 1
+    comment = result["comments"][0]
+    assert comment["pull_request_id"] == 123
+    assert comment["target_repository"] == "mozilla/firefox"
+    assert comment["comment_id"] == 456
+    assert comment["author_username"] == "commenter1"
+    assert comment["date_created"] == "2024-01-01T14:00:00Z"
+    assert comment["character_count"] == 17
+
+
+def test_comment_character_count():
+    """Test character count calculation for comments."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [
+                {
+                    "id": 1,
+                    "user": {"login": "user1"},
+                    "body": "Short",
+                    "created_at": "2024-01-01",
+                },
+                {
+                    "id": 2,
+                    "user": {"login": "user2"},
+                    "body": "This is a much longer comment with more text",
+                    "created_at": "2024-01-01",
+                },
+            ],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert result["comments"][0]["character_count"] == 5
+    assert result["comments"][1]["character_count"] == 44
+
+
+def test_comment_status_from_review():
+    """Test that comment status is mapped from review_id_statuses."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [
+                {
+                    "id": 789,
+                    "user": {"login": "reviewer"},
+                    "state": "APPROVED",
+                    "submitted_at": "2024-01-01",
+                }
+            ],
+            "comment_data": [
+                {
+                    "id": 456,
+                    "user": {"login": "commenter"},
+                    "body": "LGTM",
+                    "created_at": "2024-01-01",
+                    "pull_request_review_id": 789,
+                }
+            ],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    # Comment should have status from the review
+    assert result["comments"][0]["status"] == "APPROVED"
+
+
+def test_comment_empty_body():
+    """Test handling comments with empty or None body."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [
+                {
+                    "id": 1,
+                    "user": {"login": "user1"},
+                    "body": None,
+                    "created_at": "2024-01-01",
+                },
+                {
+                    "id": 2,
+                    "user": {"login": "user2"},
+                    "body": "",
+                    "created_at": "2024-01-01",
+                },
+            ],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert result["comments"][0]["character_count"] == 0
+    assert result["comments"][1]["character_count"] == 0
+
+
+def test_empty_raw_data():
+    """Test handling empty input list."""
+    result = main.transform_data([], "mozilla/firefox")
+
+    assert result["pull_requests"] == []
+    assert result["commits"] == []
+    assert result["reviewers"] == []
+    assert result["comments"] == []
+
+
+def test_pr_without_commits_reviewers_comments():
+    """Test PR with no commits, reviewers, or comments."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "Minimal PR",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert len(result["pull_requests"]) == 1
+    assert len(result["commits"]) == 0
+    assert len(result["reviewers"]) == 0
+    assert len(result["comments"]) == 0
+
+
+def test_return_structure():
+    """Test that transform_data returns dict with 4 keys."""
+    raw_data = [
+        {
+            "number": 1,
+            "title": "Test",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert isinstance(result, dict)
+    assert "pull_requests" in result
+    assert "commits" in result
+    assert "reviewers" in result
+    assert "comments" in result
+
+
+def test_all_tables_have_target_repository():
+    """Test that all tables include target_repository field."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "Test PR",
+            "state": "open",
+            "labels": [],
+            "commit_data": [
+                {
+                    "sha": "abc",
+                    "commit": {"author": {"name": "Author", "date": "2024-01-01"}},
+                    "files": [{"filename": "test.py", "additions": 1, "deletions": 0}],
+                }
+            ],
+            "reviewer_data": [
+                {
+                    "id": 1,
+                    "user": {"login": "reviewer"},
+                    "state": "APPROVED",
+                    "submitted_at": "2024-01-01",
+                }
+            ],
+            "comment_data": [
+                {
+                    "id": 2,
+                    "user": {"login": "commenter"},
+                    "body": "Test",
+                    "created_at": "2024-01-01",
+                }
+            ],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert result["pull_requests"][0]["target_repository"] == "mozilla/firefox"
+    assert result["commits"][0]["target_repository"] == "mozilla/firefox"
+    assert result["reviewers"][0]["target_repository"] == "mozilla/firefox"
+    assert result["comments"][0]["target_repository"] == "mozilla/firefox"