diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 1f43151..6414038 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -11,24 +11,33 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.11', '3.12', '3.13'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] steps: - name: Checkout uses: actions/checkout@v5 + with: + fetch-depth: 0 # Need full history for git tests - name: Setup Python uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies + - name: Install package and dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt - pip install pytest + pip install -e . + pip install pytest pytest-cov + + - name: Run unit tests + run: | + pytest tests/ -v --cov=contrib_checker --cov-report=xml --cov-report=term - - name: Run tests - run: pytest -q - env: - PYTHONPATH: ${{ github.workspace }} + - name: Upload coverage to Codecov + if: matrix.python-version == '3.11' + uses: codecov/codecov-action@v3 + with: + file: ./coverage.xml + flags: unittests + name: codecov-umbrella diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..dc053d2 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,14 @@ +include README.md +include LICENSE +include CITATION.cff +include CONTRIBUTING.md +include requirements.txt +include action.yml +include .gitlab-ci.yml +include GITLAB_CI_USAGE.md +include contrib_checker_core.py +include check_contributors_github.py +include check_contributors_gitlab.py +recursive-include tests *.py +global-exclude __pycache__ +global-exclude *.py[co] diff --git a/README.md b/README.md index ecd51e3..e8d4278 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,110 @@ -# ContribChecker +# contrib-checker -ContribChecker is a GitHub Action and helper script that verifies that contributors who appear in the git history are listed in the repository metadata files (`CITATION.cff` and `codemeta.json`). It uses a `.mailmap` file to unify multiple emails/names for the same person. +contrib-checker is a library and set of tools that verify contributors from git history are properly listed in repository metadata files (`CITATION.cff` and `codemeta.json`). +It provides: -Why this is useful -- Keeps citation and credit metadata accurate when new contributors add commits -- Helps projects maintain reproducible credit and citation information +- **Python library**: Installable package for programmatic use +- **Command-line tool**: `contrib-checker` CLI for local checking +- **GitHub Action**: Automated checking in GitHub workflows +- **GitLab CI**: Support for GitLab merge request checking -What this repository provides -- A Python script at `check_contributors.py` that performs the check -- A GitHub Actions bot at `action.yml` that runs the script on PR events +## Installation -How it works -- The action runs on PR events. It runs `git log --use-mailmap --format='%aN <%aE>' BASE..HEAD` to collect commit authors, so ensure `.mailmap` is present if you need to unify multiple emails. -- It compares commit authors against `CITATION.cff` and `codemeta.json` and posts a comment if missing contributors are found. -- If `mode: fail` is set in the config, the Action will fail the job (exit code 1). +### As a Python package +```bash +pip install contrib-checker +``` + +### For development + +```bash +git clone https://github.com/vuillaut/contrib-checker.git +cd contrib-checker +pip install -e . + +``` + +## Usage + +### Command-line tool + +After installation, you can use the `contrib-checker` command: + +```bash +# Check all contributors in current repository +contrib-checker + +# Check with specific mode +contrib-checker --mode fail + +# Check with ignore lists +contrib-checker --ignore-emails bot@example.com --ignore-logins bot-user + +# Check specific commit range +contrib-checker --from-sha abc123 --to-sha def456 + +# Use specific repository path +contrib-checker --repo-path /path/to/repo + +# See all options +contrib-checker --help +``` + +### As a Python library + +```python +from contrib_checker import ContributorChecker +from pathlib import Path + +# Initialize checker +config = { + 'mode': 'warn', # or 'fail' + 'ignore_emails': ['bot@example.com'], + 'ignore_logins': ['bot-user'] +} + +checker = ContributorChecker( + repo_path=Path('.'), + config=config +) + +# Check all contributors +success, results = checker.check_all_contributors() + +# Check specific commit range +success, results = checker.check_range_contributors( + from_sha='abc123', + to_sha='def456', + description='PR commits' +) + +# Check results +if results['missing_overall']: + print("Missing contributors:") + for contributor in results['missing_overall']: + print(f" {contributor}") +``` + +### Platform-specific usage + +```python +# GitHub-specific wrapper +from contrib_checker import GitHubContributorChecker -## Quick start +github_checker = GitHubContributorChecker() +success = github_checker.check_pr_contributors() + +# GitLab-specific wrapper +from contrib_checker import GitLabContributorChecker + +gitlab_checker = GitLabContributorChecker() +success = gitlab_checker.check_mr_contributors() +``` + +## GitHub Action Setup + +### Quick start 1. Ensure your repository has `CITATION.cff` and/or `codemeta.json` with author/contributor entries. 2. Add a `.mailmap` at the repository root if you need to unify alternate emails or names from the git history. @@ -26,6 +114,16 @@ How it works ### Example `.github/workflows/contrib-check.yml` ```yaml +name: Contributor Check + +on: + pull_request: + types: [opened, synchronize] + +permissions: + contents: read + pull-requests: write # allows posting comments on PRs + jobs: contrib-check: runs-on: ubuntu-latest @@ -36,12 +134,60 @@ jobs: with: github_token: ${{ secrets.GITHUB_TOKEN }} mode: warn # or 'fail' to make the workflow fail when contributors are missing - ignore_emails: "dependabot[bot]@users.noreply.github.com,ci-bot@example.com,noreply@github.com" - ignore_logins: "dependabot[bot],github-actions[bot],ci-bot" + ignore_emails: "dependabot[bot]@users.noreply.github.com,ci-bot@example.com" + ignore_logins: "dependabot[bot],github-actions[bot]" ``` +## GitLab CI Setup + +See [GITLAB_CI_USAGE.md](GITLAB_CI_USAGE.md) for detailed GitLab CI setup instructions. + +### Example `.gitlab-ci.yml` + +```yaml +contrib-check: + stage: test + image: python:3.11 + script: + - pip install contrib-checker + - contrib-checker + rules: + - if: $CI_PIPELINE_SOURCE == "merge_request_event" +``` + +## How it works + +- Uses `git log --use-mailmap` to collect commit authors, so ensure `.mailmap` is present if you need to unify multiple emails +- Compares commit authors against `CITATION.cff` and `codemeta.json` contributors +- Posts comments on GitHub PRs or GitLab MRs when missing contributors are found +- Can be configured to fail CI when contributors are missing (`mode: fail`) + ## Requirements -- GitHub Actions must be enabled for your repository. -- A `CITATION.cff` or `codemeta.json` file must be present and properly formatted. -- Optional: A `.mailmap` file if you need to unify contributor names/emails from git history. \ No newline at end of file +- Python 3.8+ +- Git repository with contributor metadata files +- For GitHub Actions: `CITATION.cff` or `codemeta.json` file +- For GitLab CI: Same metadata files plus GitLab API token +- Optional: `.mailmap` file to unify contributor names/emails + +## Configuration + +The tool can be configured via: + +1. **Configuration file**: `.github/contrib-metadata-check.yml` (GitHub) or environment variables (GitLab) +2. **Command-line arguments**: When using the CLI +3. **Environment variables**: For CI/CD integration + +### Configuration options + +- `mode`: `warn` (default) or `fail` +- `ignore_emails`: List of email addresses to ignore +- `ignore_logins`: List of login names to ignore + +## Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md) for development guidelines. + +## License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. \ No newline at end of file diff --git a/action.yml b/action.yml index 4c00928..de4692d 100644 --- a/action.yml +++ b/action.yml @@ -30,12 +30,12 @@ runs: with: python-version: '3.x' - - name: Install deps - run: python -m pip install -r ${{ github.action_path }}/requirements.txt + - name: Install contrib-checker package + run: python -m pip install ${{ github.action_path }} shell: bash - - name: Run check script - run: python ${{ github.action_path }}/check_contributors.py + - name: Run contributor check + run: python -m contrib_checker.github shell: bash env: GITHUB_TOKEN: ${{ inputs.github_token }} diff --git a/check_contributors.py b/check_contributors.py deleted file mode 100644 index 7ceeafe..0000000 --- a/check_contributors.py +++ /dev/null @@ -1,341 +0,0 @@ - -#!/usr/bin/env python3 -"""Contributor checker script used by the workflow and by local tests. - -This is a compact, dependency-light implementation that: -- collects contributors with `git log --use-mailmap` (PR range or all) -- parses `CITATION.cff` and `codemeta.json` -- normalizes names and compares -- posts a PR comment when credentials are present (requires `requests`) -""" - -import os -import sys -import subprocess -import yaml -import json -import re -from pathlib import Path -from typing import Set - -try: - import requests -except Exception: - requests = None - - -class ContributorChecker: - def __init__(self) -> None: - self.github_token = os.environ.get('GITHUB_TOKEN') - self.github_repo = os.environ.get('GITHUB_REPOSITORY') - self.pr_number = os.environ.get('PR_NUMBER') - self.pr_base_sha = os.environ.get('PR_BASE_SHA') - self.pr_head_sha = os.environ.get('PR_HEAD_SHA') - self.repo_root = Path('.') - self.config = self._load_config() - - def _load_config(self): - cfg = self.repo_root / '.github' / 'contrib-metadata-check.yml' - default = { - 'mode': 'warn', - 'ignore_emails': ['dependabot[bot]@users.noreply.github.com'], - 'ignore_logins': ['dependabot[bot]'] - } - if cfg.exists(): - try: - with open(cfg, 'r', encoding='utf-8') as f: - data = yaml.safe_load(f) or {} - default.update(data) - except Exception as e: - print(f"Warning loading config: {e}") - - # Override with action inputs if provided - action_mode = os.environ.get('ACTION_MODE') - if action_mode: - default['mode'] = action_mode - - action_ignore_emails = os.environ.get('ACTION_IGNORE_EMAILS', '').strip() - if action_ignore_emails: - emails = [email.strip() for email in action_ignore_emails.split(',') if email.strip()] - default['ignore_emails'] = emails - - action_ignore_logins = os.environ.get('ACTION_IGNORE_LOGINS', '').strip() - if action_ignore_logins: - logins = [login.strip() for login in action_ignore_logins.split(',') if login.strip()] - default['ignore_logins'] = logins - - return default - - def _run_git(self, args: list) -> str: - try: - res = subprocess.run(['git'] + args, cwd=self.repo_root, capture_output=True, text=True, check=True) - return res.stdout - except subprocess.CalledProcessError as e: - print(f"git failed: {' '.join(e.cmd)} -> {e.stderr}") - return '' - - def should_include_contributor(self, contributor: str) -> bool: - m = re.search(r'<([^>]+)>', contributor) - if m and m.group(1) in self.config.get('ignore_emails', []): - return False - - # Check ignore_logins - extract potential login from contributor string - ignore_logins = self.config.get('ignore_logins', []) - for login in ignore_logins: - if login.lower() in contributor.lower(): - return False - - lower = contributor.lower() - if 'bot' in lower or 'dependabot' in lower: - return False - return True - - def get_pr_contributors(self) -> Set[str]: - if not (self.pr_base_sha and self.pr_head_sha): - print('PR SHAs not set; returning empty set') - return set() - out = self._run_git(['log', '--use-mailmap', '--format=%aN <%aE>', f'{self.pr_base_sha}..{self.pr_head_sha}']) - return {l.strip() for l in out.splitlines() if l.strip() and self.should_include_contributor(l.strip())} - - def get_all_contributors(self) -> Set[str]: - out = self._run_git(['log', '--use-mailmap', '--format=%aN <%aE>', '--all']) - return {l.strip() for l in out.splitlines() if l.strip() and self.should_include_contributor(l.strip())} - - def parse_citation_cff(self) -> Set[str]: - path = self.repo_root / 'CITATION.cff' - if not path.exists(): - return set() - try: - with open(path, 'r', encoding='utf-8') as f: - data = yaml.safe_load(f) or {} - authors = data.get('authors', []) - result = set() - for a in authors: - if isinstance(a, dict): - name = None - if 'given-names' in a and 'family-names' in a: - name = f"{a.get('given-names')} {a.get('family-names')}" - elif 'name' in a: - name = a.get('name') - if name: - email = a.get('email') - if email: - result.add(f"{name} <{email}>") - else: - result.add(name) - elif isinstance(a, str): - result.add(a) - return result - except Exception as e: - print(f"Failed to parse CITATION.cff: {e}") - return set() - - def parse_codemeta_json(self) -> Set[str]: - path = self.repo_root / 'codemeta.json' - if not path.exists(): - return set() - try: - with open(path, 'r', encoding='utf-8') as f: - data = json.load(f) - result = set() - for fld in ['author', 'contributor', 'maintainer']: - authors = data.get(fld, []) - if not isinstance(authors, list): - authors = [authors] - for a in authors: - if isinstance(a, dict): - name = a.get('name') or (a.get('givenName', '') + ' ' + a.get('familyName', '')).strip() - if name: - email = a.get('email') - if email: - result.add(f"{name} <{email}>") - else: - result.add(name) - elif isinstance(a, str): - result.add(a) - return result - except Exception as e: - print(f"Failed to parse codemeta.json: {e}") - return set() - - def normalize_contributor_name(self, s: str) -> str: - s = re.sub(r'<[^>]+>', '', s) - return ' '.join(s.split()).lower() - - def find_missing_contributors(self, pr_contribs: Set[str], metadata_contribs: Set[str]) -> Set[str]: - meta_norm = {self.normalize_contributor_name(m): m for m in metadata_contribs} - missing = set() - for p in pr_contribs: - if self.normalize_contributor_name(p) not in meta_norm: - missing.add(p) - return missing - - def create_comment_body(self, missing: Set[str]) -> str: - lines = '\n'.join(f"- {m}" for m in sorted(missing)) - return ("⚠️ **Metadata check: contributors missing from citation files**\n\n" - "The following contributors from this PR are not listed in the metadata files:\n\n" - f"{lines}\n\n" - "Next steps:\n- Add them to `CITATION.cff` / `codemeta.json` or update `.mailmap` if these are aliases.\n") - - def post_pr_comment(self, missing: Set[str]) -> bool: - if not requests: - print('requests not installed; skipping post') - return False - - print(f'GitHub token present: {bool(self.github_token)}') - print(f'GitHub repo: {self.github_repo}') - print(f'PR number: {self.pr_number}') - - if not (self.github_token and self.github_repo and self.pr_number): - print('Missing GitHub env variables; cannot post comment') - print('Required env vars: GITHUB_TOKEN, GITHUB_REPOSITORY, PR_NUMBER') - return False - - url = f"https://api.github.com/repos/{self.github_repo}/issues/{self.pr_number}/comments" - headers = {'Authorization': f'token {self.github_token}', 'Accept': 'application/vnd.github.v3+json'} - - print(f'Posting comment to: {url}') - - try: - r = requests.post(url, headers=headers, json={'body': self.create_comment_body(missing)}) - print(f'Response status: {r.status_code}') - if r.status_code != 201: - print(f'Response body: {r.text}') - r.raise_for_status() - print('Posted PR comment successfully') - return True - except Exception as e: - print(f'Failed to post PR comment: {e}') - if hasattr(e, 'response'): - print(f'Response status: {e.response.status_code}') - print(f'Response body: {e.response.text}') - return False - - def check_contributors(self) -> bool: - pr_contribs = self.get_pr_contributors() - print(f'Found {len(pr_contribs)} contributors in PR commits') - for c in sorted(pr_contribs): - print(f' - {c}') - - # Check each metadata file separately - citation_cff = self.parse_citation_cff() - codemeta_json = self.parse_codemeta_json() - - print('\nChecking CITATION.cff:') - if citation_cff: - print(f' Found {len(citation_cff)} contributors in CITATION.cff') - for c in sorted(citation_cff): - print(f' - {c}') - missing_citation = self.find_missing_contributors(pr_contribs, citation_cff) - if missing_citation: - print(f' Missing from CITATION.cff: {sorted(missing_citation)}') - else: - print(' All PR contributors present in CITATION.cff') - else: - print(' CITATION.cff not found or empty') - - print('\nChecking codemeta.json:') - if codemeta_json: - print(f' Found {len(codemeta_json)} contributors in codemeta.json') - for c in sorted(codemeta_json): - print(f' - {c}') - missing_codemeta = self.find_missing_contributors(pr_contribs, codemeta_json) - if missing_codemeta: - print(f' Missing from codemeta.json: {sorted(missing_codemeta)}') - else: - print(' All PR contributors present in codemeta.json') - else: - print(' codemeta.json not found or empty') - - # Overall check (union of both files) - metadata = citation_cff | codemeta_json - missing = self.find_missing_contributors(pr_contribs, metadata) - - print('\nOverall result:') - current_mode = self.config.get('mode', 'warn') - print(f'Running in mode: {current_mode}') - - if missing: - print(f'Missing contributors (not in any metadata file): {sorted(missing)}') - print('Attempting to post PR comment...') - comment_posted = self.post_pr_comment(missing) - if not comment_posted: - print('Failed to post PR comment - check GitHub token and permissions') - - # Only fail if mode is 'fail', otherwise just warn - if current_mode == 'fail': - print('Mode is "fail" - exiting with error code') - return False - else: - print('Mode is "warn" - posting warning but not failing') - return True - else: - print('All PR contributors present in at least one metadata file') - return True - - def check_all_contributors_in_metadata(self) -> bool: - allc = self.get_all_contributors() - print(f'Found {len(allc)} total contributors in repository') - for c in sorted(allc): - print(f' - {c}') - - # Check each metadata file separately - citation_cff = self.parse_citation_cff() - codemeta_json = self.parse_codemeta_json() - - print('\nChecking CITATION.cff:') - if citation_cff: - print(f' Found {len(citation_cff)} contributors in CITATION.cff') - for c in sorted(citation_cff): - print(f' - {c}') - missing_citation = self.find_missing_contributors(allc, citation_cff) - if missing_citation: - print(f' Missing from CITATION.cff: {sorted(missing_citation)}') - else: - print(' All repository contributors present in CITATION.cff') - else: - print(' CITATION.cff not found or empty') - - print('\nChecking codemeta.json:') - if codemeta_json: - print(f' Found {len(codemeta_json)} contributors in codemeta.json') - for c in sorted(codemeta_json): - print(f' - {c}') - missing_codemeta = self.find_missing_contributors(allc, codemeta_json) - if missing_codemeta: - print(f' Missing from codemeta.json: {sorted(missing_codemeta)}') - else: - print(' All repository contributors present in codemeta.json') - else: - print(' codemeta.json not found or empty') - - # Overall check (union of both files) - metadata = citation_cff | codemeta_json - missing = self.find_missing_contributors(allc, metadata) - - print('\nOverall result:') - if missing: - print(f'Missing contributors (not in any metadata file): {sorted(missing)}') - return False - print('All contributors present in at least one metadata file') - return True - - -def main() -> None: - checker = ContributorChecker() - test_mode = not all([checker.pr_base_sha, checker.pr_head_sha, checker.pr_number]) - try: - if test_mode: - print('Running in test mode') - ok = checker.check_all_contributors_in_metadata() - else: - print('Running in PR mode') - ok = checker.check_contributors() - sys.exit(0 if ok else 1) - except Exception as e: - print(f'Error: {e}') - sys.exit(1) - - -if __name__ == '__main__': - main() diff --git a/contrib_checker/__init__.py b/contrib_checker/__init__.py new file mode 100644 index 0000000..f5798a7 --- /dev/null +++ b/contrib_checker/__init__.py @@ -0,0 +1,19 @@ +""" +Contributor Checker - A library to check if contributors are properly listed in metadata files. + +This library provides functionality to check if all Git contributors are properly acknowledged +in CITATION.cff or codemeta.json files, with support for GitHub Actions and GitLab CI. +""" + +__version__ = "1.0.0" +__author__ = "Thomas Vuillaume" + +from .core import ContributorChecker +from .github import GitHubContributorChecker +from .gitlab import GitLabContributorChecker + +__all__ = [ + "ContributorChecker", + "GitHubContributorChecker", + "GitLabContributorChecker" +] diff --git a/contrib_checker/cli.py b/contrib_checker/cli.py new file mode 100644 index 0000000..c91445c --- /dev/null +++ b/contrib_checker/cli.py @@ -0,0 +1,132 @@ +"""Command-line interface for contrib-checker.""" + +import argparse +import sys +from pathlib import Path +from typing import Optional + +from .core import ContributorChecker + + +def create_parser() -> argparse.ArgumentParser: + """Create the argument parser.""" + parser = argparse.ArgumentParser( + description="Check if Git contributors are properly listed in metadata files", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Check all contributors in current repository + contrib-checker + + # Check contributors with specific mode + contrib-checker --mode fail + + # Check contributors with ignore lists + contrib-checker --ignore-emails bot@example.com --ignore-logins bot-user + + # Check specific commit range + contrib-checker --from-sha abc123 --to-sha def456 + + # Use specific repository path + contrib-checker --repo-path /path/to/repo + """ + ) + + parser.add_argument( + '--repo-path', + type=Path, + default=Path('.'), + help='Path to the repository root (default: current directory)' + ) + + parser.add_argument( + '--mode', + choices=['warn', 'fail'], + default='warn', + help='Behavior mode: warn (default) or fail. In fail mode, exits with error code if contributors are missing' + ) + + parser.add_argument( + '--ignore-emails', + action='append', + help='Email addresses to ignore (can be used multiple times)' + ) + + parser.add_argument( + '--ignore-logins', + action='append', + help='Login names to ignore (can be used multiple times)' + ) + + parser.add_argument( + '--from-sha', + help='Start commit SHA for range checking (requires --to-sha)' + ) + + parser.add_argument( + '--to-sha', + help='End commit SHA for range checking (requires --from-sha)' + ) + + parser.add_argument( + '--verbose', '-v', + action='store_true', + help='Enable verbose output' + ) + + return parser + + +def main(args: Optional[list] = None) -> int: + """Main CLI entry point.""" + parser = create_parser() + parsed_args = parser.parse_args(args) + + # Build configuration + config = { + 'mode': parsed_args.mode, + 'ignore_emails': parsed_args.ignore_emails or [], + 'ignore_logins': parsed_args.ignore_logins or [] + } + + # Initialize checker + checker = ContributorChecker( + repo_path=parsed_args.repo_path, + config=config + ) + + try: + # Check if we're doing range checking or all contributors + if parsed_args.from_sha and parsed_args.to_sha: + if parsed_args.verbose: + print(f'Checking contributors from {parsed_args.from_sha} to {parsed_args.to_sha}') + success, results = checker.check_range_contributors( + parsed_args.from_sha, + parsed_args.to_sha, + "specified range" + ) + elif parsed_args.from_sha or parsed_args.to_sha: + print('Error: Both --from-sha and --to-sha must be provided for range checking') + return 1 + else: + if parsed_args.verbose: + print('Checking all repository contributors') + success, results = checker.check_all_contributors() + + # In warn mode, always return 0 + # In fail mode, return 1 if there are missing contributors + if parsed_args.mode == 'fail' and not success: + return 1 + + return 0 + + except Exception as e: + print(f'Error: {e}') + if parsed_args.verbose: + import traceback + traceback.print_exc() + return 1 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/contrib_checker/core.py b/contrib_checker/core.py new file mode 100644 index 0000000..5a2b361 --- /dev/null +++ b/contrib_checker/core.py @@ -0,0 +1,304 @@ +#!/usr/bin/env python3 +"""Core contributor checker - platform independent. + +This module provides the core functionality for checking contributors from git history +against metadata files (CITATION.cff and codemeta.json). It's designed to be used +by platform-specific wrappers (GitHub Actions, GitLab CI, etc.). +""" + +import subprocess +import yaml +import json +import re +from pathlib import Path +from typing import Set, Dict, Any, Tuple + + +class ContributorChecker: + """Core contributor checker functionality.""" + + def __init__(self, repo_path: Path = None, config: Dict[str, Any] = None): + """Initialize the contributor checker. + + Args: + repo_path: Path to the repository root. Defaults to current directory. + config: Configuration dictionary with ignore lists and mode. + """ + self.repo_path = repo_path or Path('.') + self.config = config or self._get_default_config() + + def _get_default_config(self) -> Dict[str, Any]: + """Get default configuration.""" + return { + 'mode': 'warn', + 'ignore_emails': ['dependabot[bot]@users.noreply.github.com'], + 'ignore_logins': ['dependabot[bot]'] + } + + def _run_git(self, args: list) -> str: + """Run git command and return output.""" + try: + res = subprocess.run( + ['git'] + args, + cwd=self.repo_path, + capture_output=True, + text=True, + check=True + ) + return res.stdout + except subprocess.CalledProcessError as e: + print(f"git failed: {' '.join(e.cmd)} -> {e.stderr}") + return '' + + def should_include_contributor(self, contributor: str) -> bool: + """Check if a contributor should be included in the check.""" + # Check ignore_emails + m = re.search(r'<([^>]+)>', contributor) + if m and m.group(1) in self.config.get('ignore_emails', []): + return False + + # Check ignore_logins + ignore_logins = self.config.get('ignore_logins', []) + for login in ignore_logins: + if login.lower() in contributor.lower(): + return False + + # Built-in bot filtering + lower = contributor.lower() + if 'bot' in lower or 'dependabot' in lower: + return False + return True + + def get_contributors_from_range(self, base_sha: str, head_sha: str) -> Set[str]: + """Get contributors from a specific commit range.""" + if not (base_sha and head_sha): + print('Base and head SHAs not provided; returning empty set') + return set() + + out = self._run_git([ + 'log', '--use-mailmap', '--format=%aN <%aE>', + f'{base_sha}..{head_sha}' + ]) + + contributors = set() + for line in out.splitlines(): + line = line.strip() + if line and self.should_include_contributor(line): + contributors.add(line) + + return contributors + + def get_all_contributors(self) -> Set[str]: + """Get all contributors from repository history.""" + out = self._run_git(['log', '--use-mailmap', '--format=%aN <%aE>', '--all']) + + contributors = set() + for line in out.splitlines(): + line = line.strip() + if line and self.should_include_contributor(line): + contributors.add(line) + + return contributors + + def parse_citation_cff(self) -> Set[str]: + """Parse CITATION.cff file and extract contributors.""" + path = self.repo_path / 'CITATION.cff' + if not path.exists(): + return set() + + try: + with open(path, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) or {} + + authors = data.get('authors', []) + result = set() + + for a in authors: + if isinstance(a, dict): + name = None + if 'given-names' in a and 'family-names' in a: + name = f"{a.get('given-names')} {a.get('family-names')}" + elif 'name' in a: + name = a.get('name') + + if name: + email = a.get('email') + if email: + result.add(f"{name} <{email}>") + else: + result.add(name) + elif isinstance(a, str): + result.add(a) + + return result + except Exception as e: + print(f"Failed to parse CITATION.cff: {e}") + return set() + + def parse_codemeta_json(self) -> Set[str]: + """Parse codemeta.json file and extract contributors.""" + path = self.repo_path / 'codemeta.json' + if not path.exists(): + return set() + + try: + with open(path, 'r', encoding='utf-8') as f: + data = json.load(f) + + result = set() + for field in ['author', 'contributor', 'maintainer']: + authors = data.get(field, []) + if not isinstance(authors, list): + authors = [authors] + + for a in authors: + if isinstance(a, dict): + name = a.get('name') or ( + a.get('givenName', '') + ' ' + a.get('familyName', '') + ).strip() + if name: + email = a.get('email') + if email: + result.add(f"{name} <{email}>") + else: + result.add(name) + elif isinstance(a, str): + result.add(a) + + return result + except Exception as e: + print(f"Failed to parse codemeta.json: {e}") + return set() + + def normalize_contributor_name(self, s: str) -> str: + """Normalize contributor name for comparison.""" + s = re.sub(r'<[^>]+>', '', s) + return ' '.join(s.split()).lower() + + def find_missing_contributors(self, + contributors: Set[str], + metadata_contribs: Set[str]) -> Set[str]: + """Find contributors missing from metadata.""" + meta_norm = { + self.normalize_contributor_name(m): m + for m in metadata_contribs + } + missing = set() + for contrib in contributors: + if self.normalize_contributor_name(contrib) not in meta_norm: + missing.add(contrib) + return missing + + def check_contributors_detailed(self, + contributors: Set[str], + context_name: str = "commits") -> Tuple[bool, Dict[str, Any]]: + """Check contributors against metadata files with detailed results. + + Args: + contributors: Set of contributor strings to check + context_name: Description of the contributor context (e.g., "PR commits", "MR commits") + + Returns: + Tuple of (success: bool, results: dict with detailed information) + """ + print(f'Found {len(contributors)} contributors in {context_name}') + for c in sorted(contributors): + print(f' - {c}') + + # Check each metadata file separately + citation_cff = self.parse_citation_cff() + codemeta_json = self.parse_codemeta_json() + + print('\nChecking CITATION.cff:') + if citation_cff: + print(f' Found {len(citation_cff)} contributors in CITATION.cff') + for c in sorted(citation_cff): + print(f' - {c}') + missing_citation = self.find_missing_contributors(contributors, citation_cff) + if missing_citation: + print(f' Missing from CITATION.cff: {sorted(missing_citation)}') + else: + print(f' All {context_name} contributors present in CITATION.cff') + else: + print(' CITATION.cff not found or empty') + missing_citation = contributors.copy() + + print('\nChecking codemeta.json:') + if codemeta_json: + print(f' Found {len(codemeta_json)} contributors in codemeta.json') + for c in sorted(codemeta_json): + print(f' - {c}') + missing_codemeta = self.find_missing_contributors(contributors, codemeta_json) + if missing_codemeta: + print(f' Missing from codemeta.json: {sorted(missing_codemeta)}') + else: + print(f' All {context_name} contributors present in codemeta.json') + else: + print(' codemeta.json not found or empty') + missing_codemeta = contributors.copy() + + # Overall check (union of both files) + metadata = citation_cff | codemeta_json + missing_overall = self.find_missing_contributors(contributors, metadata) + + print('\nOverall result:') + current_mode = self.config.get('mode', 'warn') + print(f'Running in mode: {current_mode}') + + # Prepare detailed results + results = { + 'contributors': contributors, + 'citation_cff': citation_cff, + 'codemeta_json': codemeta_json, + 'missing_citation': missing_citation, + 'missing_codemeta': missing_codemeta, + 'missing_overall': missing_overall, + 'metadata_combined': metadata, + 'mode': current_mode, + 'context_name': context_name + } + + if missing_overall: + print(f'Missing contributors (not in any metadata file): {sorted(missing_overall)}') + # Return success/failure based on mode + success = current_mode != 'fail' + if success: + print('Mode is "warn" - posting warning but not failing') + else: + print('Mode is "fail" - check failed') + else: + print(f'All {context_name} contributors present in at least one metadata file') + success = True + + return success, results + + def check_range_contributors(self, base_sha: str, head_sha: str, + context_name: str = "range commits") -> Tuple[bool, Dict[str, Any]]: + """Check contributors from a specific commit range.""" + contributors = self.get_contributors_from_range(base_sha, head_sha) + return self.check_contributors_detailed(contributors, context_name) + + def check_all_contributors(self) -> Tuple[bool, Dict[str, Any]]: + """Check all repository contributors.""" + contributors = self.get_all_contributors() + return self.check_contributors_detailed(contributors, "repository history") + + +def create_comment_body(missing: Set[str], platform: str = "PR") -> str: + """Create a comment body for missing contributors. + + Args: + missing: Set of missing contributor strings + platform: Platform name ("PR", "MR", etc.) + + Returns: + Formatted comment body + """ + lines = '\n'.join(f"- {m}" for m in sorted(missing)) + return ( + f"⚠️ **Metadata check: contributors missing from citation files**\n\n" + f"The following contributors from this {platform} are not listed in the metadata files:\n\n" + f"{lines}\n\n" + f"Next steps:\n" + f"- Add them to `CITATION.cff` / `codemeta.json` or update `.mailmap` if these are aliases.\n" + ) diff --git a/contrib_checker/github.py b/contrib_checker/github.py new file mode 100644 index 0000000..a95ac4e --- /dev/null +++ b/contrib_checker/github.py @@ -0,0 +1,163 @@ +"""GitHub-specific contributor checker implementation.""" + +import os +import sys +from pathlib import Path +from typing import Dict, Any + +try: + import requests +except ImportError: + requests = None + +from .core import ContributorChecker, create_comment_body + + +class GitHubContributorChecker: + """GitHub-specific wrapper for ContributorChecker.""" + + def __init__(self, repo_path: Path = None) -> None: + # GitHub environment variables + self.github_token = os.environ.get('GITHUB_TOKEN') + self.github_repo = os.environ.get('GITHUB_REPOSITORY') + self.pr_number = os.environ.get('PR_NUMBER') + self.pr_base_sha = os.environ.get('PR_BASE_SHA') + self.pr_head_sha = os.environ.get('PR_HEAD_SHA') + + # Set default repo root + if repo_path is None: + repo_path = Path('.') + + # Load configuration + self.config = self._load_config(repo_path) + + # Initialize core checker + self.core_checker = ContributorChecker( + repo_path=repo_path, + config=self.config + ) + + def _load_config(self, repo_path: Path) -> Dict[str, Any]: + """Load configuration from file and environment variables.""" + import yaml + + cfg_path = repo_path / '.github' / 'contrib-metadata-check.yml' + default = { + 'mode': 'warn', + 'ignore_emails': ['dependabot[bot]@users.noreply.github.com'], + 'ignore_logins': ['dependabot[bot]'] + } + + # Load from config file if it exists + if cfg_path.exists(): + try: + with open(cfg_path, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) or {} + default.update(data) + except Exception as e: + print(f"Warning loading config: {e}") + + # Override with action inputs if provided + action_mode = os.environ.get('ACTION_MODE') + if action_mode: + default['mode'] = action_mode + + action_ignore_emails = os.environ.get('ACTION_IGNORE_EMAILS', '').strip() + if action_ignore_emails: + emails = [email.strip() for email in action_ignore_emails.split(',') if email.strip()] + default['ignore_emails'] = emails + + action_ignore_logins = os.environ.get('ACTION_IGNORE_LOGINS', '').strip() + if action_ignore_logins: + logins = [login.strip() for login in action_ignore_logins.split(',') if login.strip()] + default['ignore_logins'] = logins + + return default + + def post_pr_comment(self, missing_contributors) -> bool: + """Post a comment on the GitHub pull request.""" + if not requests: + print('requests not installed; skipping post') + return False + + print(f'GitHub token present: {bool(self.github_token)}') + print(f'GitHub repo: {self.github_repo}') + print(f'PR number: {self.pr_number}') + + if not (self.github_token and self.github_repo and self.pr_number): + print('Missing GitHub env variables; cannot post comment') + print('Required env vars: GITHUB_TOKEN, GITHUB_REPOSITORY, PR_NUMBER') + return False + + url = f"https://api.github.com/repos/{self.github_repo}/issues/{self.pr_number}/comments" + headers = { + 'Authorization': f'token {self.github_token}', + 'Accept': 'application/vnd.github.v3+json' + } + + print(f'Posting comment to: {url}') + + try: + comment_body = create_comment_body(missing_contributors, "PR") + r = requests.post(url, headers=headers, json={'body': comment_body}) + print(f'Response status: {r.status_code}') + if r.status_code != 201: + print(f'Response body: {r.text}') + r.raise_for_status() + print('Posted PR comment successfully') + return True + except Exception as e: + print(f'Failed to post PR comment: {e}') + if hasattr(e, 'response'): + print(f'Response status: {e.response.status_code}') + print(f'Response body: {e.response.text}') + return False + + def check_pr_contributors(self) -> bool: + """Check PR contributors against metadata files.""" + success, results = self.core_checker.check_range_contributors( + self.pr_base_sha, + self.pr_head_sha, + "PR commits" + ) + + # Post comment if there are missing contributors + if results['missing_overall']: + print('Attempting to post PR comment...') + comment_posted = self.post_pr_comment(results['missing_overall']) + if not comment_posted: + print('Failed to post PR comment - check GitHub token and permissions') + + return success + + def check_all_contributors(self) -> bool: + """Check all repository contributors against metadata files.""" + success, results = self.core_checker.check_all_contributors() + return success + + +def main() -> None: + """Main function to run the GitHub contributor checker.""" + checker = GitHubContributorChecker() + + # Determine if we're in PR mode or test mode + pr_mode = bool(checker.pr_base_sha and checker.pr_head_sha and checker.pr_number) + + try: + if pr_mode: + print('Running in GitHub PR mode') + ok = checker.check_pr_contributors() + else: + print('Running in test mode (checking all contributors)') + ok = checker.check_all_contributors() + + sys.exit(0 if ok else 1) + except Exception as e: + print(f'Error: {e}') + import traceback + traceback.print_exc() + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/contrib_checker/gitlab.py b/contrib_checker/gitlab.py new file mode 100644 index 0000000..2f1cda2 --- /dev/null +++ b/contrib_checker/gitlab.py @@ -0,0 +1,157 @@ +"""GitLab-specific contributor checker implementation.""" + +import os +import sys +from pathlib import Path +from typing import Dict, Any + +try: + import requests +except ImportError: + requests = None + +from .core import ContributorChecker, create_comment_body + + +class GitLabContributorChecker: + """GitLab-specific wrapper for ContributorChecker.""" + + def __init__(self, repo_path: Path = None) -> None: + # GitLab CI environment variables + self.gitlab_token = os.environ.get('GITLAB_TOKEN') + self.project_id = os.environ.get('CI_PROJECT_ID') + self.project_url = os.environ.get('CI_PROJECT_URL') + self.mr_iid = os.environ.get('CI_MERGE_REQUEST_IID') + self.target_branch_sha = os.environ.get('CI_MERGE_REQUEST_TARGET_BRANCH_SHA') + self.source_branch_sha = os.environ.get('CI_COMMIT_SHA') + self.gitlab_api_url = os.environ.get('CI_API_V4_URL', 'https://gitlab.com/api/v4') + + # Set default repo root + if repo_path is None: + repo_path = Path('.') + + # Load configuration + self.config = self._load_config(repo_path) + + # Initialize core checker + self.core_checker = ContributorChecker( + repo_path=repo_path, + config=self.config + ) + + def _load_config(self, repo_path: Path) -> Dict[str, Any]: + """Load configuration from environment variables.""" + default = { + 'mode': 'warn', + 'ignore_emails': ['dependabot[bot]@users.noreply.github.com', 'noreply@gitlab.com'], + 'ignore_logins': ['dependabot[bot]', 'gitlab-bot'] + } + + # Override with environment variables + mode = os.environ.get('MODE', '').strip().lower() + if mode in ['warn', 'fail']: + default['mode'] = mode + + ignore_emails = os.environ.get('IGNORE_EMAILS', '').strip() + if ignore_emails: + emails = [email.strip() for email in ignore_emails.split(',') if email.strip()] + default['ignore_emails'] = emails + + ignore_logins = os.environ.get('IGNORE_LOGINS', '').strip() + if ignore_logins: + logins = [login.strip() for login in ignore_logins.split(',') if login.strip()] + default['ignore_logins'] = logins + + return default + + def post_mr_comment(self, missing_contributors) -> bool: + """Post a comment on the GitLab merge request.""" + if not requests: + print('requests not installed; skipping comment post') + return False + + print(f'GitLab token present: {bool(self.gitlab_token)}') + print(f'Project ID: {self.project_id}') + print(f'MR IID: {self.mr_iid}') + + if not (self.gitlab_token and self.project_id and self.mr_iid): + print('Missing GitLab env variables; cannot post comment') + print('Required: GITLAB_TOKEN, CI_PROJECT_ID, CI_MERGE_REQUEST_IID') + return False + + url = f"{self.gitlab_api_url}/projects/{self.project_id}/merge_requests/{self.mr_iid}/notes" + headers = { + 'Authorization': f'Bearer {self.gitlab_token}', + 'Content-Type': 'application/json' + } + + print(f'Posting comment to: {url}') + + try: + comment_body = create_comment_body(missing_contributors, "MR") + response = requests.post( + url, + headers=headers, + json={'body': comment_body} + ) + print(f'Response status: {response.status_code}') + if response.status_code not in [200, 201]: + print(f'Response body: {response.text}') + response.raise_for_status() + print('Posted MR comment successfully') + return True + except Exception as e: + print(f'Failed to post MR comment: {e}') + if hasattr(e, 'response'): + print(f'Response status: {e.response.status_code}') + print(f'Response body: {e.response.text}') + return False + + def check_mr_contributors(self) -> bool: + """Check MR contributors against metadata files.""" + success, results = self.core_checker.check_range_contributors( + self.target_branch_sha, + self.source_branch_sha, + "MR commits" + ) + + # Post comment if there are missing contributors + if results['missing_overall']: + print('Attempting to post MR comment...') + comment_posted = self.post_mr_comment(results['missing_overall']) + if not comment_posted: + print('Failed to post MR comment - check GitLab token and permissions') + + return success + + def check_all_contributors(self) -> bool: + """Check all repository contributors against metadata files.""" + success, results = self.core_checker.check_all_contributors() + return success + + +def main() -> None: + """Main function to run the GitLab contributor checker.""" + checker = GitLabContributorChecker() + + # Determine if we're in MR mode or test mode + mr_mode = bool(checker.mr_iid and checker.target_branch_sha and checker.source_branch_sha) + + try: + if mr_mode: + print('Running in GitLab MR mode') + ok = checker.check_mr_contributors() + else: + print('Running in test mode (checking all contributors)') + ok = checker.check_all_contributors() + + sys.exit(0 if ok else 1) + except Exception as e: + print(f'Error: {e}') + import traceback + traceback.print_exc() + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a24e909 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,75 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "contrib-checker" +version = "1.0.0" +description = "Check if code contributors are properly listed in metadata files such as CITATION.cff and codemeta.json based on the git history" +readme = "README.md" +license = {file = "LICENSE"} +authors = [ + {name = "Thomas Vuillaume", email = "thomas.vuillaume@lapp.in2p3.fr"} +] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Topic :: Software Development :: Quality Assurance", + "Topic :: Software Development :: Version Control :: Git", + "Topic :: Documentation", +] +keywords = ["git", "contributors", "citation", "metadata", "github-actions", "gitlab-ci"] +requires-python = ">=3.8" +dependencies = [ + "pyyaml>=5.4.0", + "requests>=2.25.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=6.0", + "pytest-cov", + "black", + "isort", + "flake8", +] + +[project.urls] +Homepage = "https://github.com/vuillaut/contrib-checker" +Repository = "https://github.com/vuillaut/contrib-checker" +Issues = "https://github.com/vuillaut/contrib-checker/issues" + +[project.scripts] +contrib-checker = "contrib_checker.cli:main" + +[tool.setuptools.packages.find] +where = ["."] +include = ["contrib_checker*"] + +[tool.ruff] +exclude = [ + ".eggs", + ".git", + ".pytest_cache", + ".ruff_cache", + ".vscode", + "__pypackages__", + "_build", + "build", + "dist", + "node_modules", + "site-packages", +] +line-length = 99 +lint.select = ["E4", "E7", "E9", "F"] +lint.ignore = [] +lint.fixable = ["ALL"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..96a0bf3 --- /dev/null +++ b/setup.py @@ -0,0 +1,6 @@ +"""Setup script for contrib-checker package.""" + +from setuptools import setup + +# Use pyproject.toml for configuration +setup() diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..fbef04b --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,81 @@ +""" +Pytest configuration and fixtures for contrib-checker tests. +""" + +import pytest +import tempfile +import shutil +from pathlib import Path + + +@pytest.fixture +def temp_repo(): + """Create a temporary directory for testing.""" + temp_dir = Path(tempfile.mkdtemp()) + yield temp_dir + shutil.rmtree(temp_dir, ignore_errors=True) + + +@pytest.fixture +def sample_citation_cff(): + """Sample CITATION.cff content for testing.""" + return """ +cff-version: 1.2.0 +title: "Test Project" +message: "If you use this software, please cite it as below." +authors: + - family-names: "Doe" + given-names: "John" + email: "john@example.com" + orcid: "https://orcid.org/0000-0000-0000-0000" + - family-names: "Smith" + given-names: "Jane" + email: "jane@example.com" + - name: "Bot User" + email: "bot@example.com" +""" + + +@pytest.fixture +def sample_codemeta_json(): + """Sample codemeta.json content for testing.""" + return """ +{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "@type": "SoftwareSourceCode", + "name": "Test Project", + "author": [ + { + "@type": "Person", + "givenName": "John", + "familyName": "Doe", + "email": "john@example.com" + }, + { + "@type": "Person", + "givenName": "Jane", + "familyName": "Smith", + "email": "jane@example.com" + } + ], + "contributor": [ + { + "@type": "Person", + "givenName": "Bob", + "familyName": "Wilson", + "email": "bob@example.com" + } + ] +} +""" + + +@pytest.fixture +def sample_mailmap(): + """Sample .mailmap content for testing.""" + return """ +# Map multiple emails to canonical names +John Doe +John Doe +Jane Smith +""" diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..68c792d --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,205 @@ +""" +Unit tests for the CLI functionality. +""" + +from io import StringIO +from unittest.mock import patch + +from contrib_checker.cli import create_parser, main + + +class TestCLI: + """Test CLI functionality.""" + + def test_create_parser(self): + """Test argument parser creation.""" + parser = create_parser() + + # Test default values + args = parser.parse_args([]) + assert args.mode == 'warn' + assert str(args.repo_path) == '.' + assert args.ignore_emails is None + assert args.ignore_logins is None + assert args.from_sha is None + assert args.to_sha is None + assert args.verbose is False + + def test_parser_with_arguments(self): + """Test parser with various arguments.""" + parser = create_parser() + + args = parser.parse_args([ + '--repo-path', '/tmp/test', + '--mode', 'fail', + '--ignore-emails', 'bot1@example.com', + '--ignore-emails', 'bot2@example.com', + '--ignore-logins', 'bot1', + '--ignore-logins', 'bot2', + '--from-sha', 'abc123', + '--to-sha', 'def456', + '--verbose' + ]) + + assert str(args.repo_path) == '/tmp/test' + assert args.mode == 'fail' + assert args.ignore_emails == ['bot1@example.com', 'bot2@example.com'] + assert args.ignore_logins == ['bot1', 'bot2'] + assert args.from_sha == 'abc123' + assert args.to_sha == 'def456' + assert args.verbose is True + + def test_help_output(self): + """Test help output.""" + parser = create_parser() + + with patch('sys.stderr', new_callable=StringIO): + try: + parser.parse_args(['--help']) + except SystemExit as e: + assert e.code == 0 + + # Help should be printed to stderr (by argparse) + # We can't easily capture it, but we can test that --help doesn't crash + + def test_main_help(self): + """Test main function with help.""" + with patch('sys.exit') as mock_exit: + main(['--help']) + mock_exit.assert_called_once_with(0) + + def test_main_basic_usage(self, temp_repo): + """Test main function basic usage.""" + # Create a simple CITATION.cff file + citation_file = temp_repo / "CITATION.cff" + citation_file.write_text(""" +cff-version: 1.2.0 +title: "Test Project" +authors: + - family-names: "Doe" + given-names: "John" +""") + + with patch('contrib_checker.core.ContributorChecker.check_all_contributors') as mock_check: + mock_check.return_value = (True, {'missing_overall': []}) + + result = main([ + '--repo-path', str(temp_repo), + '--mode', 'warn' + ]) + + assert result == 0 + mock_check.assert_called_once() + + def test_main_fail_mode_with_missing_contributors(self, temp_repo): + """Test main function in fail mode with missing contributors.""" + with patch('contrib_checker.core.ContributorChecker.check_all_contributors') as mock_check: + mock_check.return_value = (False, {'missing_overall': ['Missing Person']}) + + result = main([ + '--repo-path', str(temp_repo), + '--mode', 'fail' + ]) + + assert result == 1 + + def test_main_warn_mode_with_missing_contributors(self, temp_repo): + """Test main function in warn mode with missing contributors.""" + with patch('contrib_checker.core.ContributorChecker.check_all_contributors') as mock_check: + mock_check.return_value = (False, {'missing_overall': ['Missing Person']}) + + result = main([ + '--repo-path', str(temp_repo), + '--mode', 'warn' + ]) + + # Warn mode should return 0 even with missing contributors + assert result == 0 + + def test_main_range_checking(self, temp_repo): + """Test main function with range checking.""" + with patch('contrib_checker.core.ContributorChecker.check_range_contributors') as mock_check: + mock_check.return_value = (True, {'missing_overall': []}) + + result = main([ + '--repo-path', str(temp_repo), + '--from-sha', 'abc123', + '--to-sha', 'def456', + '--verbose' + ]) + + assert result == 0 + mock_check.assert_called_once_with('abc123', 'def456', 'specified range') + + def test_main_range_checking_incomplete_args(self, temp_repo): + """Test main function with incomplete range arguments.""" + # Only from-sha provided + result = main([ + '--repo-path', str(temp_repo), + '--from-sha', 'abc123' + ]) + + assert result == 1 + + # Only to-sha provided + result = main([ + '--repo-path', str(temp_repo), + '--to-sha', 'def456' + ]) + + assert result == 1 + + def test_main_with_ignore_options(self, temp_repo): + """Test main function with ignore options.""" + with patch('contrib_checker.core.ContributorChecker.check_all_contributors') as mock_check: + mock_check.return_value = (True, {'missing_overall': []}) + + with patch('contrib_checker.core.ContributorChecker.__init__') as mock_init: + mock_init.return_value = None + + main([ + '--repo-path', str(temp_repo), + '--ignore-emails', 'bot1@example.com', + '--ignore-emails', 'bot2@example.com', + '--ignore-logins', 'bot1', + '--ignore-logins', 'bot2' + ]) + + # Check that ContributorChecker was initialized with correct config + mock_init.assert_called_once() + args, kwargs = mock_init.call_args + config = kwargs['config'] + assert config['ignore_emails'] == ['bot1@example.com', 'bot2@example.com'] + assert config['ignore_logins'] == ['bot1', 'bot2'] + + def test_main_exception_handling(self, temp_repo): + """Test main function exception handling.""" + with patch('contrib_checker.core.ContributorChecker.check_all_contributors') as mock_check: + mock_check.side_effect = Exception('Test error') + + result = main([ + '--repo-path', str(temp_repo) + ]) + + assert result == 1 + + def test_main_exception_handling_verbose(self, temp_repo): + """Test main function exception handling with verbose output.""" + with patch('contrib_checker.core.ContributorChecker.check_all_contributors') as mock_check: + mock_check.side_effect = Exception('Test error') + + with patch('traceback.print_exc') as mock_traceback: + result = main([ + '--repo-path', str(temp_repo), + '--verbose' + ]) + + assert result == 1 + mock_traceback.assert_called_once() + + def test_main_as_module(self): + """Test that main can be called without arguments (uses sys.argv).""" + with patch('sys.argv', ['contrib-checker', '--help']): + with patch('sys.exit') as mock_exit: + main() + mock_exit.assert_called_once_with(0) diff --git a/tests/test_contrib_checker.py b/tests/test_contrib_checker.py new file mode 100644 index 0000000..51df0ff --- /dev/null +++ b/tests/test_contrib_checker.py @@ -0,0 +1,338 @@ +#!/usr/bin/env python3 +""" +Comprehensive test suite for contrib-checker package. + +Tests all modules: core, github, gitlab, and cli. +""" + +import pytest +import sys +import os +import tempfile +import subprocess +from pathlib import Path +from unittest.mock import Mock, patch + +# Add the package to the path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from contrib_checker import ContributorChecker, GitHubContributorChecker, GitLabContributorChecker +from contrib_checker.cli import main as cli_main + + +class TestContributorChecker: + """Test the core ContributorChecker functionality.""" + + def setup_method(self): + """Set up test fixtures.""" + self.temp_dir = Path(tempfile.mkdtemp()) + self.config = { + 'mode': 'warn', + 'ignore_emails': ['bot@example.com'], + 'ignore_logins': ['test-bot'] + } + + def teardown_method(self): + """Clean up test fixtures.""" + import shutil + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def test_initialization(self): + """Test ContributorChecker initialization.""" + checker = ContributorChecker(repo_path=self.temp_dir, config=self.config) + assert checker.repo_path == self.temp_dir + assert checker.config == self.config + + def test_normalize_contributor_name(self): + """Test contributor name normalization.""" + checker = ContributorChecker(repo_path=self.temp_dir, config=self.config) + + test_cases = [ + ("John Doe ", "john doe"), + (" JANE DOE ", "jane doe"), + ("Bob Smith ", "bob smith"), + ("Alice-Jane Wilson", "alice-jane wilson"), + ("测试用户 ", "测试用户"), + ] + + for input_name, expected in test_cases: + result = checker.normalize_contributor_name(input_name) + assert result == expected, f"Expected '{expected}', got '{result}' for input '{input_name}'" + + def test_parse_citation_cff(self): + """Test CITATION.cff parsing.""" + # Create a test CITATION.cff file + citation_content = """ +cff-version: 1.2.0 +title: "Test Project" +authors: + - family-names: "Doe" + given-names: "John" + email: "john@example.com" + - family-names: "Smith" + given-names: "Jane" + email: "jane@example.com" +""" + citation_file = self.temp_dir / "CITATION.cff" + citation_file.write_text(citation_content) + + checker = ContributorChecker(repo_path=self.temp_dir, config=self.config) + contributors = checker.parse_citation_cff() + + assert len(contributors) == 2 + assert "John Doe " in contributors + assert "Jane Smith " in contributors + + def test_parse_codemeta_json(self): + """Test codemeta.json parsing.""" + # Create a test codemeta.json file + codemeta_content = """ +{ + "author": [ + { + "givenName": "John", + "familyName": "Doe", + "email": "john@example.com" + }, + { + "givenName": "Jane", + "familyName": "Smith", + "email": "jane@example.com" + } + ] +} +""" + codemeta_file = self.temp_dir / "codemeta.json" + codemeta_file.write_text(codemeta_content) + + checker = ContributorChecker(repo_path=self.temp_dir, config=self.config) + contributors = checker.parse_codemeta_json() + + assert len(contributors) == 2 + assert "John Doe " in contributors + assert "Jane Smith " in contributors + + @patch('subprocess.run') + def test_get_contributors_from_range(self, mock_run): + """Test getting contributors from git range.""" + # Mock git log output + mock_run.return_value = Mock( + returncode=0, + stdout="John Doe \nJane Smith \n" + ) + + checker = ContributorChecker(repo_path=self.temp_dir, config=self.config) + contributors = checker.get_contributors_from_range("abc123", "def456") + + assert len(contributors) == 2 + assert "John Doe " in contributors + assert "Jane Smith " in contributors + + # Verify git command was called correctly + mock_run.assert_called_once() + args = mock_run.call_args[0][0] + assert "git" in args + assert "log" in args + assert "--use-mailmap" in args + assert "abc123..def456" in args + + +class TestGitHubContributorChecker: + """Test GitHub-specific functionality.""" + + def setup_method(self): + """Set up test fixtures.""" + self.temp_dir = Path(tempfile.mkdtemp()) + + # Mock GitHub environment variables + self.env_patcher = patch.dict(os.environ, { + 'GITHUB_TOKEN': 'test-token', + 'GITHUB_REPOSITORY': 'test/repo', + 'PR_NUMBER': '123', + 'PR_BASE_SHA': 'abc123', + 'PR_HEAD_SHA': 'def456', + 'ACTION_MODE': 'warn' + }) + self.env_patcher.start() + + def teardown_method(self): + """Clean up test fixtures.""" + self.env_patcher.stop() + import shutil + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def test_initialization(self): + """Test GitHubContributorChecker initialization.""" + checker = GitHubContributorChecker(repo_path=self.temp_dir) + + assert checker.github_token == 'test-token' + assert checker.github_repo == 'test/repo' + assert checker.pr_number == '123' + assert checker.pr_base_sha == 'abc123' + assert checker.pr_head_sha == 'def456' + + def test_load_config_from_env(self): + """Test configuration loading from environment.""" + with patch.dict(os.environ, { + 'ACTION_IGNORE_EMAILS': 'bot1@example.com,bot2@example.com', + 'ACTION_IGNORE_LOGINS': 'bot1,bot2' + }): + checker = GitHubContributorChecker(repo_path=self.temp_dir) + + assert checker.config['ignore_emails'] == ['bot1@example.com', 'bot2@example.com'] + assert checker.config['ignore_logins'] == ['bot1', 'bot2'] + + @patch('requests.post') + def test_post_pr_comment(self, mock_post): + """Test posting PR comments.""" + mock_post.return_value = Mock(status_code=201) + + checker = GitHubContributorChecker(repo_path=self.temp_dir) + missing_contributors = ['John Doe', 'Jane Smith'] + + result = checker.post_pr_comment(missing_contributors) + + assert result is True + mock_post.assert_called_once() + + # Check the API call + call_args = mock_post.call_args + assert 'https://api.github.com/repos/test/repo/issues/123/comments' == call_args[0][0] + assert 'Authorization' in call_args[1]['headers'] + + +class TestGitLabContributorChecker: + """Test GitLab-specific functionality.""" + + def setup_method(self): + """Set up test fixtures.""" + self.temp_dir = Path(tempfile.mkdtemp()) + + # Mock GitLab environment variables + self.env_patcher = patch.dict(os.environ, { + 'GITLAB_TOKEN': 'test-token', + 'CI_PROJECT_ID': '123', + 'CI_MERGE_REQUEST_IID': '456', + 'CI_MERGE_REQUEST_TARGET_BRANCH_SHA': 'abc123', + 'CI_COMMIT_SHA': 'def456', + 'MODE': 'warn' + }) + self.env_patcher.start() + + def teardown_method(self): + """Clean up test fixtures.""" + self.env_patcher.stop() + import shutil + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def test_initialization(self): + """Test GitLabContributorChecker initialization.""" + checker = GitLabContributorChecker(repo_path=self.temp_dir) + + assert checker.gitlab_token == 'test-token' + assert checker.project_id == '123' + assert checker.mr_iid == '456' + assert checker.target_branch_sha == 'abc123' + assert checker.source_branch_sha == 'def456' + + @patch('requests.post') + def test_post_mr_comment(self, mock_post): + """Test posting MR comments.""" + mock_post.return_value = Mock(status_code=201) + + checker = GitLabContributorChecker(repo_path=self.temp_dir) + missing_contributors = ['John Doe', 'Jane Smith'] + + result = checker.post_mr_comment(missing_contributors) + + assert result is True + mock_post.assert_called_once() + + +class TestCLI: + """Test the CLI functionality.""" + + def setup_method(self): + """Set up test fixtures.""" + self.temp_dir = Path(tempfile.mkdtemp()) + + def teardown_method(self): + """Clean up test fixtures.""" + import shutil + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def test_cli_help(self): + """Test CLI help output.""" + with pytest.raises(SystemExit) as exc_info: + cli_main(['--help']) + assert exc_info.value.code == 0 + + def test_cli_basic_usage(self): + """Test basic CLI usage.""" + # Create minimal test repository structure + citation_content = """ +cff-version: 1.2.0 +title: "Test Project" +authors: + - family-names: "Doe" + given-names: "John" +""" + citation_file = self.temp_dir / "CITATION.cff" + citation_file.write_text(citation_content) + + with patch('contrib_checker.core.ContributorChecker.check_all_contributors') as mock_check: + mock_check.return_value = (True, {'missing_overall': []}) + + result = cli_main([ + '--repo-path', str(self.temp_dir), + '--mode', 'warn' + ]) + + assert result == 0 + + +class TestIntegration: + """Integration tests for the full package.""" + + def test_package_imports(self): + """Test that all package components can be imported.""" + from contrib_checker import ContributorChecker + from contrib_checker import GitHubContributorChecker + from contrib_checker import GitLabContributorChecker + from contrib_checker.cli import main + + assert ContributorChecker is not None + assert GitHubContributorChecker is not None + assert GitLabContributorChecker is not None + assert main is not None + + def test_module_execution(self): + """Test that modules can be executed.""" + # Test GitHub module + result = subprocess.run([ + sys.executable, '-m', 'contrib_checker.github' + ], capture_output=True, text=True, cwd=Path(__file__).parent.parent) + + # Should exit with some code (0 or 1) but not crash + assert result.returncode in [0, 1] + + # Test GitLab module + result = subprocess.run([ + sys.executable, '-m', 'contrib_checker.gitlab' + ], capture_output=True, text=True, cwd=Path(__file__).parent.parent) + + # Should exit with some code (0 or 1) but not crash + assert result.returncode in [0, 1] + + def test_cli_execution(self): + """Test CLI execution.""" + result = subprocess.run([ + sys.executable, '-m', 'contrib_checker.cli', '--help' + ], capture_output=True, text=True, cwd=Path(__file__).parent.parent) + + assert result.returncode == 0 + assert 'usage:' in result.stdout.lower() + + +if __name__ == '__main__': + pytest.main([__file__, '-v']) diff --git a/tests/test_contributor_check.py b/tests/test_contributor_check.py deleted file mode 100644 index 7e74c07..0000000 --- a/tests/test_contributor_check.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python3 -"""Light test runner for contributor checker parsing functions. - -Place this file in `.github` and run locally to exercise parsing and normalization -without reaching out to the GitHub API. -""" - -import sys -import os -from pathlib import Path - -# Ensure the script directory is importable -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from check_contributors import ContributorChecker - - -def test_citation_parsing(): - print("Testing CITATION.cff parsing...") - - os.environ.update({ - 'GITHUB_TOKEN': 'test-token', - 'GITHUB_REPOSITORY': 'test/repo', - 'PR_NUMBER': '1', - 'PR_BASE_SHA': 'base-sha', - 'PR_HEAD_SHA': 'head-sha' - }) - - checker = ContributorChecker() - print(f"Config loaded: {checker.config}") - - citation_contributors = checker.parse_citation_cff() - print(f"Found {len(citation_contributors)} contributors in CITATION.cff:") - for contrib in sorted(citation_contributors): - print(f" - {contrib}") - - codemeta_contributors = checker.parse_codemeta_json() - print(f"Found {len(codemeta_contributors)} contributors in codemeta.json:") - for contrib in sorted(codemeta_contributors): - print(f" - {contrib}") - - test_contributors = [ - "Thomas Vuillaume ", - "Thomas Vuillaume ", - "THOMAS VUILLAUME" - ] - - print("\nTesting name normalization:") - for contrib in test_contributors: - normalized = checker.normalize_contributor_name(contrib) - print(f" '{contrib}' -> '{normalized}'") - - return True - - -if __name__ == '__main__': - try: - test_citation_parsing() - print("\n✅ Test run complete") - except Exception as e: - print(f"\n❌ Test failed: {e}") - import traceback - traceback.print_exc() - sys.exit(1) diff --git a/tests/test_core.py b/tests/test_core.py new file mode 100644 index 0000000..8d9169b --- /dev/null +++ b/tests/test_core.py @@ -0,0 +1,205 @@ +""" +Unit tests for the core ContributorChecker functionality. +""" + +from unittest.mock import Mock, patch + +from contrib_checker.core import ContributorChecker + + +class TestContributorCheckerCore: + """Test core functionality with realistic scenarios.""" + + def test_parse_citation_cff_comprehensive(self, temp_repo, sample_citation_cff): + """Test comprehensive CITATION.cff parsing.""" + citation_file = temp_repo / "CITATION.cff" + citation_file.write_text(sample_citation_cff) + + config = {'mode': 'warn', 'ignore_emails': [], 'ignore_logins': []} + checker = ContributorChecker(repo_path=temp_repo, config=config) + + contributors = checker.parse_citation_cff() + + # Should find all authors with email format + assert len(contributors) == 3 + assert "John Doe " in contributors + assert "Jane Smith " in contributors + assert "Bot User " in contributors + + def test_parse_codemeta_json_comprehensive(self, temp_repo, sample_codemeta_json): + """Test comprehensive codemeta.json parsing.""" + codemeta_file = temp_repo / "codemeta.json" + codemeta_file.write_text(sample_codemeta_json) + + config = {'mode': 'warn', 'ignore_emails': [], 'ignore_logins': []} + checker = ContributorChecker(repo_path=temp_repo, config=config) + + contributors = checker.parse_codemeta_json() + + # Should find authors and contributors with email format + assert len(contributors) == 3 + assert "John Doe " in contributors + assert "Jane Smith " in contributors + assert "Bob Wilson " in contributors + + def test_ignore_patterns(self, temp_repo, sample_citation_cff): + """Test that ignore patterns work correctly.""" + citation_file = temp_repo / "CITATION.cff" + citation_file.write_text(sample_citation_cff) + + config = { + 'mode': 'warn', + 'ignore_emails': ['bot@example.com'], + 'ignore_logins': ['bot-user'] + } + checker = ContributorChecker(repo_path=temp_repo, config=config) + + # Test the filtering logic directly + test_contributors = [ + "John Doe ", + "Jane Smith ", + "Bot User ", # Should be ignored due to email + "dependabot[bot] ", # Should be ignored due to built-in pattern + "github-actions[bot] " # Should be ignored due to built-in pattern + ] + + # Test each contributor individually + assert checker.should_include_contributor("John Doe ") + assert checker.should_include_contributor("Jane Smith ") + assert not checker.should_include_contributor("Bot User ") # ignored email + assert not checker.should_include_contributor("dependabot[bot] ") # built-in bot + assert not checker.should_include_contributor("github-actions[bot] ") # built-in bot + + # Test with actual range check by mocking git output instead + with patch('subprocess.run') as mock_run: + mock_run.return_value = Mock( + returncode=0, + stdout="\n".join(test_contributors) + "\n" + ) + + contributors = checker.get_contributors_from_range("abc123", "def456") + + # Should only include non-ignored contributors + assert len(contributors) == 2 + assert "John Doe " in contributors + assert "Jane Smith " in contributors + assert "Bot User " not in contributors + assert "dependabot[bot] " not in contributors + assert "github-actions[bot] " not in contributors + + @patch('subprocess.run') + def test_git_mailmap_usage(self, mock_run, temp_repo, sample_mailmap): + """Test that git mailmap is used correctly.""" + mailmap_file = temp_repo / ".mailmap" + mailmap_file.write_text(sample_mailmap) + + mock_run.return_value = Mock( + returncode=0, + stdout="John Doe \nJane Smith \n" + ) + + config = {'mode': 'warn', 'ignore_emails': [], 'ignore_logins': []} + checker = ContributorChecker(repo_path=temp_repo, config=config) + + contributors = checker.get_contributors_from_range("abc123", "def456") + + # Verify git command includes --use-mailmap + mock_run.assert_called_once() + args = mock_run.call_args[0][0] + assert "--use-mailmap" in args + + assert len(contributors) == 2 + assert "John Doe " in contributors + assert "Jane Smith " in contributors + + def test_name_normalization_edge_cases(self, temp_repo): + """Test name normalization with edge cases.""" + config = {'mode': 'warn', 'ignore_emails': [], 'ignore_logins': []} + checker = ContributorChecker(repo_path=temp_repo, config=config) + + test_cases = [ + # Input, Expected output + ("John Doe ", "john doe"), + (" John Doe ", "john doe"), + ("JOHN DOE", "john doe"), + ("john doe", "john doe"), + ("Jean-Claude Van Damme", "jean-claude van damme"), + ("O'Connor, Mary", "o'connor, mary"), + ("José García", "josé garcía"), + ("李小明", "李小明"), + ("", ""), + (" ", ""), + ("No-Email Person", "no-email person"), + ("Person With Numbers123", "person with numbers123"), + ] + + for input_name, expected in test_cases: + result = checker.normalize_contributor_name(input_name) + assert result == expected, f"Failed for '{input_name}': expected '{expected}', got '{result}'" + + def test_check_all_contributors_integration(self, temp_repo, sample_citation_cff): + """Test full integration of checking all contributors.""" + # Create citation file + citation_file = temp_repo / "CITATION.cff" + citation_file.write_text(sample_citation_cff) + + config = {'mode': 'warn', 'ignore_emails': [], 'ignore_logins': []} + checker = ContributorChecker(repo_path=temp_repo, config=config) + + # Mock git log to return some contributors + with patch('subprocess.run') as mock_run: + mock_run.return_value = Mock( + returncode=0, + stdout="John Doe \nMissing Person \n" + ) + + success, results = checker.check_all_contributors() + + # John Doe should be found, Missing Person should not + assert len(results['missing_overall']) == 1 + assert "Missing Person " in results['missing_overall'] + + # In warn mode, should still return success + assert success is True + + def test_check_range_contributors_fail_mode(self, temp_repo): + """Test that fail mode works correctly.""" + config = {'mode': 'fail', 'ignore_emails': [], 'ignore_logins': []} + checker = ContributorChecker(repo_path=temp_repo, config=config) + + # Mock git log to return contributors not in any metadata file + with patch('subprocess.run') as mock_run: + mock_run.return_value = Mock( + returncode=0, + stdout="Missing Person \n" + ) + + success, results = checker.check_range_contributors( + "abc123", "def456", "test commits" + ) + + # Should fail because contributor is missing and mode is 'fail' + assert success is False + assert len(results['missing_overall']) == 1 + + def test_empty_metadata_files(self, temp_repo): + """Test behavior with empty or missing metadata files.""" + config = {'mode': 'warn', 'ignore_emails': [], 'ignore_logins': []} + checker = ContributorChecker(repo_path=temp_repo, config=config) + + # No metadata files exist + citation_contributors = checker.parse_citation_cff() + codemeta_contributors = checker.parse_codemeta_json() + + assert len(citation_contributors) == 0 + assert len(codemeta_contributors) == 0 + + # Create empty files + (temp_repo / "CITATION.cff").write_text("") + (temp_repo / "codemeta.json").write_text("{}") + + citation_contributors = checker.parse_citation_cff() + codemeta_contributors = checker.parse_codemeta_json() + + assert len(citation_contributors) == 0 + assert len(codemeta_contributors) == 0 diff --git a/tests/test_github.py b/tests/test_github.py new file mode 100644 index 0000000..da2870f --- /dev/null +++ b/tests/test_github.py @@ -0,0 +1,217 @@ +""" +Unit tests for GitHub-specific functionality. +""" + +import os +from unittest.mock import Mock, patch + +from contrib_checker.github import GitHubContributorChecker + + +class TestGitHubContributorChecker: + """Test GitHub-specific functionality.""" + + def test_initialization_with_env_vars(self, temp_repo): + """Test initialization with GitHub environment variables.""" + with patch.dict(os.environ, { + 'GITHUB_TOKEN': 'test-token', + 'GITHUB_REPOSITORY': 'owner/repo', + 'PR_NUMBER': '123', + 'PR_BASE_SHA': 'base-sha', + 'PR_HEAD_SHA': 'head-sha', + 'ACTION_MODE': 'fail', + 'ACTION_IGNORE_EMAILS': 'bot1@example.com,bot2@example.com', + 'ACTION_IGNORE_LOGINS': 'bot1,bot2' + }): + checker = GitHubContributorChecker(repo_path=temp_repo) + + assert checker.github_token == 'test-token' + assert checker.github_repo == 'owner/repo' + assert checker.pr_number == '123' + assert checker.pr_base_sha == 'base-sha' + assert checker.pr_head_sha == 'head-sha' + assert checker.config['mode'] == 'fail' + assert checker.config['ignore_emails'] == ['bot1@example.com', 'bot2@example.com'] + assert checker.config['ignore_logins'] == ['bot1', 'bot2'] + + def test_load_config_from_file(self, temp_repo): + """Test loading configuration from .github/contrib-metadata-check.yml.""" + # Create config file + config_dir = temp_repo / '.github' + config_dir.mkdir() + config_file = config_dir / 'contrib-metadata-check.yml' + config_content = """ +mode: fail +ignore_emails: + - config-bot@example.com +ignore_logins: + - config-bot +""" + config_file.write_text(config_content) + + with patch.dict(os.environ, { + 'GITHUB_TOKEN': 'test-token', + 'GITHUB_REPOSITORY': 'owner/repo' + }): + checker = GitHubContributorChecker(repo_path=temp_repo) + + assert checker.config['mode'] == 'fail' + assert 'config-bot@example.com' in checker.config['ignore_emails'] + assert 'config-bot' in checker.config['ignore_logins'] + + def test_config_env_override_file(self, temp_repo): + """Test that environment variables override config file.""" + # Create config file with some settings + config_dir = temp_repo / '.github' + config_dir.mkdir() + config_file = config_dir / 'contrib-metadata-check.yml' + config_content = """ +mode: warn +ignore_emails: + - file-bot@example.com +""" + config_file.write_text(config_content) + + # Set environment variables that should override + with patch.dict(os.environ, { + 'GITHUB_TOKEN': 'test-token', + 'GITHUB_REPOSITORY': 'owner/repo', + 'ACTION_MODE': 'fail', + 'ACTION_IGNORE_EMAILS': 'env-bot@example.com' + }): + checker = GitHubContributorChecker(repo_path=temp_repo) + + # Environment should override file + assert checker.config['mode'] == 'fail' + assert checker.config['ignore_emails'] == ['env-bot@example.com'] + + @patch('requests.post') + def test_post_pr_comment_success(self, mock_post, temp_repo): + """Test successful PR comment posting.""" + mock_post.return_value = Mock(status_code=201) + + with patch.dict(os.environ, { + 'GITHUB_TOKEN': 'test-token', + 'GITHUB_REPOSITORY': 'owner/repo', + 'PR_NUMBER': '123' + }): + checker = GitHubContributorChecker(repo_path=temp_repo) + missing_contributors = ['John Doe ', 'Jane Smith '] + + result = checker.post_pr_comment(missing_contributors) + + assert result is True + mock_post.assert_called_once() + + # Verify API call details + call_args = mock_post.call_args + assert call_args[0][0] == 'https://api.github.com/repos/owner/repo/issues/123/comments' + assert 'Authorization' in call_args[1]['headers'] + assert call_args[1]['headers']['Authorization'] == 'token test-token' + assert 'body' in call_args[1]['json'] + + @patch('requests.post') + def test_post_pr_comment_failure(self, mock_post, temp_repo): + """Test PR comment posting failure.""" + mock_post.return_value = Mock(status_code=403, text='Forbidden') + mock_post.return_value.raise_for_status.side_effect = Exception('HTTP 403') + + with patch.dict(os.environ, { + 'GITHUB_TOKEN': 'test-token', + 'GITHUB_REPOSITORY': 'owner/repo', + 'PR_NUMBER': '123' + }): + checker = GitHubContributorChecker(repo_path=temp_repo) + missing_contributors = ['John Doe '] + + result = checker.post_pr_comment(missing_contributors) + + assert result is False + + def test_post_pr_comment_missing_env(self, temp_repo): + """Test PR comment posting with missing environment variables.""" + # Missing some required environment variables + with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}, clear=True): + checker = GitHubContributorChecker(repo_path=temp_repo) + missing_contributors = ['John Doe '] + + result = checker.post_pr_comment(missing_contributors) + + assert result is False + + @patch('requests.post', side_effect=ImportError()) + def test_post_pr_comment_no_requests(self, mock_post, temp_repo): + """Test PR comment posting when requests is not available.""" + with patch.dict(os.environ, { + 'GITHUB_TOKEN': 'test-token', + 'GITHUB_REPOSITORY': 'owner/repo', + 'PR_NUMBER': '123' + }): + # Mock requests not being available + with patch('contrib_checker.github.requests', None): + checker = GitHubContributorChecker(repo_path=temp_repo) + missing_contributors = ['John Doe '] + + result = checker.post_pr_comment(missing_contributors) + + assert result is False + + def test_check_pr_contributors(self, temp_repo): + """Test checking PR contributors.""" + with patch.dict(os.environ, { + 'GITHUB_TOKEN': 'test-token', + 'GITHUB_REPOSITORY': 'owner/repo', + 'PR_NUMBER': '123', + 'PR_BASE_SHA': 'base-sha', + 'PR_HEAD_SHA': 'head-sha' + }): + checker = GitHubContributorChecker(repo_path=temp_repo) + + # Mock the core checker + with patch.object(checker.core_checker, 'check_range_contributors') as mock_check: + mock_check.return_value = (True, {'missing_overall': []}) + + result = checker.check_pr_contributors() + + assert result is True + mock_check.assert_called_once_with('base-sha', 'head-sha', 'PR commits') + + def test_check_pr_contributors_with_missing(self, temp_repo): + """Test checking PR contributors with missing contributors.""" + with patch.dict(os.environ, { + 'GITHUB_TOKEN': 'test-token', + 'GITHUB_REPOSITORY': 'owner/repo', + 'PR_NUMBER': '123', + 'PR_BASE_SHA': 'base-sha', + 'PR_HEAD_SHA': 'head-sha' + }): + checker = GitHubContributorChecker(repo_path=temp_repo) + + # Mock the core checker to return missing contributors + missing_contributors = ['John Doe '] + with patch.object(checker.core_checker, 'check_range_contributors') as mock_check: + mock_check.return_value = (False, {'missing_overall': missing_contributors}) + + # Mock comment posting + with patch.object(checker, 'post_pr_comment', return_value=True) as mock_comment: + result = checker.check_pr_contributors() + + assert result is False + mock_comment.assert_called_once_with(missing_contributors) + + def test_check_all_contributors(self, temp_repo): + """Test checking all repository contributors.""" + with patch.dict(os.environ, { + 'GITHUB_TOKEN': 'test-token', + 'GITHUB_REPOSITORY': 'owner/repo' + }): + checker = GitHubContributorChecker(repo_path=temp_repo) + + # Mock the core checker + with patch.object(checker.core_checker, 'check_all_contributors') as mock_check: + mock_check.return_value = (True, {'missing_overall': []}) + + result = checker.check_all_contributors() + + assert result is True + mock_check.assert_called_once() diff --git a/tests/test_gitlab.py b/tests/test_gitlab.py new file mode 100644 index 0000000..97e8152 --- /dev/null +++ b/tests/test_gitlab.py @@ -0,0 +1,224 @@ +""" +Unit tests for GitLab-specific functionality. +""" + +import os +from unittest.mock import Mock, patch + +from contrib_checker.gitlab import GitLabContributorChecker + + +class TestGitLabContributorChecker: + """Test GitLab-specific functionality.""" + + def test_initialization_with_env_vars(self, temp_repo): + """Test initialization with GitLab environment variables.""" + with patch.dict(os.environ, { + 'GITLAB_TOKEN': 'test-token', + 'CI_PROJECT_ID': '123', + 'CI_PROJECT_URL': 'https://gitlab.com/owner/repo', + 'CI_MERGE_REQUEST_IID': '456', + 'CI_MERGE_REQUEST_TARGET_BRANCH_SHA': 'target-sha', + 'CI_COMMIT_SHA': 'source-sha', + 'CI_API_V4_URL': 'https://gitlab.com/api/v4', + 'MODE': 'fail', + 'IGNORE_EMAILS': 'bot1@example.com,bot2@example.com', + 'IGNORE_LOGINS': 'bot1,bot2' + }): + checker = GitLabContributorChecker(repo_path=temp_repo) + + assert checker.gitlab_token == 'test-token' + assert checker.project_id == '123' + assert checker.project_url == 'https://gitlab.com/owner/repo' + assert checker.mr_iid == '456' + assert checker.target_branch_sha == 'target-sha' + assert checker.source_branch_sha == 'source-sha' + assert checker.gitlab_api_url == 'https://gitlab.com/api/v4' + assert checker.config['mode'] == 'fail' + assert checker.config['ignore_emails'] == ['bot1@example.com', 'bot2@example.com'] + assert checker.config['ignore_logins'] == ['bot1', 'bot2'] + + def test_default_api_url(self, temp_repo): + """Test default GitLab API URL when not specified.""" + with patch.dict(os.environ, { + 'GITLAB_TOKEN': 'test-token', + 'CI_PROJECT_ID': '123' + }, clear=True): + checker = GitLabContributorChecker(repo_path=temp_repo) + + assert checker.gitlab_api_url == 'https://gitlab.com/api/v4' + + def test_config_loading(self, temp_repo): + """Test configuration loading from environment variables.""" + with patch.dict(os.environ, { + 'MODE': 'warn', + 'IGNORE_EMAILS': 'ci@example.com, build@example.com ', # Test whitespace handling + 'IGNORE_LOGINS': 'ci-bot, build-bot ' # Test whitespace handling + }): + checker = GitLabContributorChecker(repo_path=temp_repo) + + assert checker.config['mode'] == 'warn' + assert checker.config['ignore_emails'] == ['ci@example.com', 'build@example.com'] + assert checker.config['ignore_logins'] == ['ci-bot', 'build-bot'] + + def test_config_defaults(self, temp_repo): + """Test default configuration values.""" + with patch.dict(os.environ, {}, clear=True): + checker = GitLabContributorChecker(repo_path=temp_repo) + + assert checker.config['mode'] == 'warn' + assert 'dependabot[bot]@users.noreply.github.com' in checker.config['ignore_emails'] + assert 'noreply@gitlab.com' in checker.config['ignore_emails'] + assert 'dependabot[bot]' in checker.config['ignore_logins'] + assert 'gitlab-bot' in checker.config['ignore_logins'] + + @patch('requests.post') + def test_post_mr_comment_success(self, mock_post, temp_repo): + """Test successful MR comment posting.""" + mock_post.return_value = Mock(status_code=201) + + with patch.dict(os.environ, { + 'GITLAB_TOKEN': 'test-token', + 'CI_PROJECT_ID': '123', + 'CI_MERGE_REQUEST_IID': '456', + 'CI_API_V4_URL': 'https://gitlab.example.com/api/v4' + }): + checker = GitLabContributorChecker(repo_path=temp_repo) + missing_contributors = ['John Doe ', 'Jane Smith '] + + result = checker.post_mr_comment(missing_contributors) + + assert result is True + mock_post.assert_called_once() + + # Verify API call details + call_args = mock_post.call_args + expected_url = 'https://gitlab.example.com/api/v4/projects/123/merge_requests/456/notes' + assert call_args[0][0] == expected_url + assert 'Authorization' in call_args[1]['headers'] + assert call_args[1]['headers']['Authorization'] == 'Bearer test-token' + assert call_args[1]['headers']['Content-Type'] == 'application/json' + assert 'body' in call_args[1]['json'] + + @patch('requests.post') + def test_post_mr_comment_failure(self, mock_post, temp_repo): + """Test MR comment posting failure.""" + mock_post.return_value = Mock(status_code=403, text='Forbidden') + mock_post.return_value.raise_for_status.side_effect = Exception('HTTP 403') + + with patch.dict(os.environ, { + 'GITLAB_TOKEN': 'test-token', + 'CI_PROJECT_ID': '123', + 'CI_MERGE_REQUEST_IID': '456' + }): + checker = GitLabContributorChecker(repo_path=temp_repo) + missing_contributors = ['John Doe '] + + result = checker.post_mr_comment(missing_contributors) + + assert result is False + + def test_post_mr_comment_missing_env(self, temp_repo): + """Test MR comment posting with missing environment variables.""" + # Missing some required environment variables + with patch.dict(os.environ, {'GITLAB_TOKEN': 'test-token'}, clear=True): + checker = GitLabContributorChecker(repo_path=temp_repo) + missing_contributors = ['John Doe '] + + result = checker.post_mr_comment(missing_contributors) + + assert result is False + + def test_post_mr_comment_no_requests(self, temp_repo): + """Test MR comment posting when requests is not available.""" + with patch.dict(os.environ, { + 'GITLAB_TOKEN': 'test-token', + 'CI_PROJECT_ID': '123', + 'CI_MERGE_REQUEST_IID': '456' + }): + # Mock requests not being available + with patch('contrib_checker.gitlab.requests', None): + checker = GitLabContributorChecker(repo_path=temp_repo) + missing_contributors = ['John Doe '] + + result = checker.post_mr_comment(missing_contributors) + + assert result is False + + def test_check_mr_contributors(self, temp_repo): + """Test checking MR contributors.""" + with patch.dict(os.environ, { + 'GITLAB_TOKEN': 'test-token', + 'CI_PROJECT_ID': '123', + 'CI_MERGE_REQUEST_IID': '456', + 'CI_MERGE_REQUEST_TARGET_BRANCH_SHA': 'target-sha', + 'CI_COMMIT_SHA': 'source-sha' + }): + checker = GitLabContributorChecker(repo_path=temp_repo) + + # Mock the core checker + with patch.object(checker.core_checker, 'check_range_contributors') as mock_check: + mock_check.return_value = (True, {'missing_overall': []}) + + result = checker.check_mr_contributors() + + assert result is True + mock_check.assert_called_once_with('target-sha', 'source-sha', 'MR commits') + + def test_check_mr_contributors_with_missing(self, temp_repo): + """Test checking MR contributors with missing contributors.""" + with patch.dict(os.environ, { + 'GITLAB_TOKEN': 'test-token', + 'CI_PROJECT_ID': '123', + 'CI_MERGE_REQUEST_IID': '456', + 'CI_MERGE_REQUEST_TARGET_BRANCH_SHA': 'target-sha', + 'CI_COMMIT_SHA': 'source-sha' + }): + checker = GitLabContributorChecker(repo_path=temp_repo) + + # Mock the core checker to return missing contributors + missing_contributors = ['John Doe '] + with patch.object(checker.core_checker, 'check_range_contributors') as mock_check: + mock_check.return_value = (False, {'missing_overall': missing_contributors}) + + # Mock comment posting + with patch.object(checker, 'post_mr_comment', return_value=True) as mock_comment: + result = checker.check_mr_contributors() + + assert result is False + mock_comment.assert_called_once_with(missing_contributors) + + def test_check_all_contributors(self, temp_repo): + """Test checking all repository contributors.""" + with patch.dict(os.environ, {}): + checker = GitLabContributorChecker(repo_path=temp_repo) + + # Mock the core checker + with patch.object(checker.core_checker, 'check_all_contributors') as mock_check: + mock_check.return_value = (True, {'missing_overall': []}) + + result = checker.check_all_contributors() + + assert result is True + mock_check.assert_called_once() + + def test_mr_mode_detection(self, temp_repo): + """Test MR mode detection based on environment variables.""" + # Test with full MR environment + with patch.dict(os.environ, { + 'CI_MERGE_REQUEST_IID': '456', + 'CI_MERGE_REQUEST_TARGET_BRANCH_SHA': 'target-sha', + 'CI_COMMIT_SHA': 'source-sha' + }): + checker = GitLabContributorChecker(repo_path=temp_repo) + + # This would be used in the main() function to determine mode + mr_mode = bool(checker.mr_iid and checker.target_branch_sha and checker.source_branch_sha) + assert mr_mode is True + + # Test without MR environment + with patch.dict(os.environ, {}, clear=True): + checker = GitLabContributorChecker(repo_path=temp_repo) + + mr_mode = bool(checker.mr_iid and checker.target_branch_sha and checker.source_branch_sha) + assert mr_mode is False diff --git a/tests/test_integration.py b/tests/test_integration.py new file mode 100644 index 0000000..e63c5ef --- /dev/null +++ b/tests/test_integration.py @@ -0,0 +1,281 @@ +""" +Integration tests for the contrib-checker package. + +These tests verify that the different components work together correctly. +""" + +import subprocess +import sys +from pathlib import Path +from unittest.mock import patch + +import contrib_checker + + +class TestPackageIntegration: + """Test package-level integration.""" + + def test_package_version(self): + """Test that the package has a version.""" + assert hasattr(contrib_checker, '__version__') + assert contrib_checker.__version__ is not None + + def test_package_exports(self): + """Test that the package exports the expected classes.""" + # Should be able to import main classes + assert contrib_checker.ContributorChecker is not None + assert contrib_checker.GitHubContributorChecker is not None + assert contrib_checker.GitLabContributorChecker is not None + + # Test __all__ contains expected items + expected_exports = [ + 'ContributorChecker', + 'GitHubContributorChecker', + 'GitLabContributorChecker' + ] + + for export in expected_exports: + assert export in contrib_checker.__all__ + + def test_module_execution_github(self): + """Test that GitHub module can be executed.""" + result = subprocess.run([ + sys.executable, '-m', 'contrib_checker.github' + ], + capture_output=True, + text=True, + cwd=Path(__file__).parent.parent + ) + + # Should exit cleanly (either 0 or 1, but not crash) + assert result.returncode in [0, 1] + + # Should not have import errors or syntax errors + assert 'ImportError' not in result.stderr + assert 'SyntaxError' not in result.stderr + assert 'ModuleNotFoundError' not in result.stderr + + def test_module_execution_gitlab(self): + """Test that GitLab module can be executed.""" + result = subprocess.run([ + sys.executable, '-m', 'contrib_checker.gitlab' + ], + capture_output=True, + text=True, + cwd=Path(__file__).parent.parent + ) + + # Should exit cleanly (either 0 or 1, but not crash) + assert result.returncode in [0, 1] + + # Should not have import errors or syntax errors + assert 'ImportError' not in result.stderr + assert 'SyntaxError' not in result.stderr + assert 'ModuleNotFoundError' not in result.stderr + + def test_cli_execution(self): + """Test that CLI can be executed.""" + result = subprocess.run([ + sys.executable, '-m', 'contrib_checker.cli', '--help' + ], + capture_output=True, + text=True, + cwd=Path(__file__).parent.parent + ) + + assert result.returncode == 0 + assert 'usage:' in result.stdout.lower() or 'usage:' in result.stderr.lower() + + +class TestEndToEndWorkflow: + """Test end-to-end workflows.""" + + def test_github_workflow_no_pr(self, temp_repo, sample_citation_cff): + """Test GitHub workflow when not in PR mode.""" + # Create test repository structure + citation_file = temp_repo / "CITATION.cff" + citation_file.write_text(sample_citation_cff) + + # Mock git log to return contributors that are in citation file + with patch.dict('os.environ', {}, clear=True): + from contrib_checker.github import GitHubContributorChecker + + checker = GitHubContributorChecker(repo_path=temp_repo) + + with patch('subprocess.run') as mock_run: + mock_run.return_value.returncode = 0 + mock_run.return_value.stdout = "John Doe \nJane Smith \n" + + success = checker.check_all_contributors() + + # Should succeed because contributors are in citation file + assert success is True + + def test_gitlab_workflow_no_mr(self, temp_repo, sample_codemeta_json): + """Test GitLab workflow when not in MR mode.""" + # Create test repository structure + codemeta_file = temp_repo / "codemeta.json" + codemeta_file.write_text(sample_codemeta_json) + + with patch.dict('os.environ', {}, clear=True): + from contrib_checker.gitlab import GitLabContributorChecker + + checker = GitLabContributorChecker(repo_path=temp_repo) + + with patch('subprocess.run') as mock_run: + mock_run.return_value.returncode = 0 + mock_run.return_value.stdout = "John Doe \nJane Smith \n" + + success = checker.check_all_contributors() + + # Should succeed because contributors are in codemeta file + assert success is True + + def test_cli_workflow_with_both_files(self, temp_repo, sample_citation_cff, sample_codemeta_json): + """Test CLI workflow with both CITATION.cff and codemeta.json.""" + # Create both metadata files + citation_file = temp_repo / "CITATION.cff" + citation_file.write_text(sample_citation_cff) + + codemeta_file = temp_repo / "codemeta.json" + codemeta_file.write_text(sample_codemeta_json) + + from contrib_checker.cli import main + + with patch('subprocess.run') as mock_run: + mock_run.return_value.returncode = 0 + # Include contributors from both files + mock_run.return_value.stdout = ( + "John Doe \n" + "Jane Smith \n" + "Bob Wilson \n" + ) + + result = main([ + '--repo-path', str(temp_repo), + '--mode', 'warn', + '--verbose' + ]) + + assert result == 0 + + def test_missing_contributors_workflow(self, temp_repo, sample_citation_cff): + """Test workflow with missing contributors.""" + # Create citation file + citation_file = temp_repo / "CITATION.cff" + citation_file.write_text(sample_citation_cff) + + from contrib_checker.cli import main + + with patch('subprocess.run') as mock_run: + mock_run.return_value.returncode = 0 + # Include contributors not in citation file + mock_run.return_value.stdout = ( + "John Doe \n" # In citation file + "Missing Person \n" # Not in citation file + ) + + # Test warn mode (should return 0) + result = main([ + '--repo-path', str(temp_repo), + '--mode', 'warn' + ]) + assert result == 0 + + # Test fail mode (should return 1) + result = main([ + '--repo-path', str(temp_repo), + '--mode', 'fail' + ]) + assert result == 1 + + def test_ignore_patterns_workflow(self, temp_repo, sample_citation_cff): + """Test workflow with ignore patterns.""" + # Create citation file + citation_file = temp_repo / "CITATION.cff" + citation_file.write_text(sample_citation_cff) + + from contrib_checker.cli import main + + with patch('subprocess.run') as mock_run: + mock_run.return_value.returncode = 0 + # Include contributors that should be ignored + mock_run.return_value.stdout = ( + "John Doe \n" # In citation file + "dependabot[bot] \n" # Should be ignored + "build-bot \n" # Should be ignored by our pattern + ) + + result = main([ + '--repo-path', str(temp_repo), + '--mode', 'fail', + '--ignore-emails', 'build@ci.example.com', + '--ignore-logins', 'build-bot' + ]) + + # Should succeed because ignored contributors are filtered out + assert result == 0 + + +class TestErrorHandling: + """Test error handling scenarios.""" + + def test_invalid_git_repository(self, temp_repo): + """Test behavior with invalid git repository.""" + from contrib_checker.core import ContributorChecker + + config = {'mode': 'warn', 'ignore_emails': [], 'ignore_logins': []} + checker = ContributorChecker(repo_path=temp_repo, config=config) + + # Mock git command to fail + with patch('subprocess.run') as mock_run: + mock_run.return_value.returncode = 1 + mock_run.return_value.stdout = "" + + contributors = checker.get_contributors_from_range("abc123", "def456") + + # Should return empty set when git fails + assert len(contributors) == 0 + + def test_malformed_metadata_files(self, temp_repo): + """Test behavior with malformed metadata files.""" + from contrib_checker.core import ContributorChecker + + # Create malformed CITATION.cff + citation_file = temp_repo / "CITATION.cff" + citation_file.write_text("invalid: yaml: content: [") + + # Create malformed codemeta.json + codemeta_file = temp_repo / "codemeta.json" + codemeta_file.write_text('{"invalid": json}') + + config = {'mode': 'warn', 'ignore_emails': [], 'ignore_logins': []} + checker = ContributorChecker(repo_path=temp_repo, config=config) + + # Should handle errors gracefully + citation_contributors = checker.parse_citation_cff() + codemeta_contributors = checker.parse_codemeta_json() + + assert len(citation_contributors) == 0 + assert len(codemeta_contributors) == 0 + + def test_network_errors(self, temp_repo): + """Test behavior with network errors.""" + import os + from contrib_checker.github import GitHubContributorChecker + + with patch.dict(os.environ, { + 'GITHUB_TOKEN': 'test-token', + 'GITHUB_REPOSITORY': 'owner/repo', + 'PR_NUMBER': '123' + }): + checker = GitHubContributorChecker(repo_path=temp_repo) + + # Mock requests to raise an exception + with patch('requests.post') as mock_post: + mock_post.side_effect = Exception('Network error') + + result = checker.post_pr_comment(['Missing Person']) + + # Should handle network errors gracefully + assert result is False