From 9eeea01558b367a6ca49e349c34408a6fcd5b239 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 19 Aug 2025 13:38:36 +0000 Subject: [PATCH] Feat: Add org repo cloning, CLI, and API handling This commit enhances the GitHub repository cloning script with several new features: - **Clone Organization Repositories:** Adds the ability to clone all repositories from a specified GitHub organization. - **Command-Line Interface:** Implements a CLI using `argparse` that allows users to choose between cloning starred repositories or organization repositories. The user can also specify the output directory. - **API Pagination and Rate Limiting:** The script now handles paginated API responses to fetch all repositories and includes logic to wait for rate limit resets, preventing failures when dealing with a large number of repositories. - **Updated Documentation and Tests:** The `README.md` is updated to reflect the new usage, and unit tests are expanded to cover the new functionality. A `requirements.txt` file has also been added. --- README.md | 43 +++++----- github_clone.py | 82 ++++++++++++++----- requirements.txt | 12 +++ tests/test_github_clone.py | 161 ++++++++++++++++++++++++++++++------- 4 files changed, 231 insertions(+), 67 deletions(-) create mode 100644 requirements.txt diff --git a/README.md b/README.md index c2d20a5..25f795a 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,21 @@ - -# GitHub Starred Repository Cloner +# GitHub Repository Cloner ![Python CI](.github/workflows/ci.yml/badge.svg) -This Python script automates the process of cloning all repositories starred by a GitHub user. Simply provide your GitHub username and Personal Access Token (PAT), and it will fetch and clone the repositories into a local directory. +This Python script automates the process of cloning repositories from GitHub. It can clone all repositories starred by a user or all repositories from a specific organization. ## Features -- Automatically fetches starred repositories. -- Clones each repository locally. +- Clone all starred repositories for a user. +- Clone all repositories from a GitHub organization. +- Command-line interface to specify what to clone. +- Handles GitHub API pagination to fetch all repositories. +- Includes rate limiting handling to avoid API request failures. - Skips already cloned repositories to avoid duplication. ## Requirements - Python 3.x -- `requests` library (`pip install requests`) -- `GitPython` library (`pip install gitpython`) +- `requests` library +- `GitPython` library - A GitHub Personal Access Token (PAT) ## Installation & Usage @@ -29,19 +31,27 @@ cd clone-github-starred pip install -r requirements.txt ``` -### 3. Configure Your Credentials -Edit `github_clone.py` and set your GitHub username and PAT: -```python -username = 'Enter Your Username' -token = 'Enter Your PAT' +### 3. Run the Script +Execute the script using the command-line interface. You must provide a GitHub Personal Access Token (PAT) with the `--token` argument. + +#### To clone starred repositories: +Use the `--starred` argument with your GitHub username. +```bash +python github_clone.py --token YOUR_PAT --starred YOUR_USERNAME ``` -### 4. Run the Script -Execute the script to clone starred repositories: +#### To clone repositories from an organization: +Use the `--org` argument with the organization name. ```bash -python github_clone.py +python github_clone.py --token YOUR_PAT --org ORGANIZATION_NAME ``` +#### Optional Arguments: +- `--clone-dir`: Specify a directory to clone the repositories into. Defaults to `repos`. + ```bash + python github_clone.py --token YOUR_PAT --starred YOUR_USERNAME --clone-dir my_starred_repos + ``` + ## Troubleshooting - Ensure your PAT has the necessary permissions to read repository data. - Verify your network connection if cloning fails. @@ -52,6 +62,3 @@ Feel free to submit issues or pull requests to enhance the functionality. ## License This project is licensed under the MIT License. - -``` -``` diff --git a/github_clone.py b/github_clone.py index 49ae214..eacecd2 100644 --- a/github_clone.py +++ b/github_clone.py @@ -1,15 +1,48 @@ import os import requests import git +import argparse +import time + +def get_paged_data(url, headers): + """ + Fetches all pages of data from a paginated GitHub API endpoint. + """ + repos = [] + while url: + response = requests.get(url, headers=headers) + response.raise_for_status() + repos.extend(response.json()) + + if 'next' in response.links: + url = response.links['next']['url'] + else: + url = None + + # Handle rate limiting + if int(response.headers.get('X-RateLimit-Remaining', 1)) == 0: + reset_time = int(response.headers.get('X-RateLimit-Reset', 0)) + sleep_duration = max(0, reset_time - time.time()) + print(f"Rate limit exceeded. Waiting for {sleep_duration:.0f} seconds.") + time.sleep(sleep_duration) + + return repos def get_starred_repos(username, token): """ Fetches the list of starred repositories for a given user. """ url = f'https://api.github.com/users/{username}/starred' - response = requests.get(url, auth=(username, token)) - response.raise_for_status() # Raise an exception for bad status codes - return response.json() + headers = {'Authorization': f'token {token}'} + return get_paged_data(url, headers) + +def get_org_repos(org, token): + """ + Fetches the list of repositories for a given organization. + """ + url = f'https://api.github.com/orgs/{org}/repos' + headers = {'Authorization': f'token {token}'} + return get_paged_data(url, headers) def clone_repo(repo_info, clone_dir): """ @@ -21,8 +54,11 @@ def clone_repo(repo_info, clone_dir): if not os.path.exists(repo_dir): print(f'Cloning {repo_name}...') - git.Repo.clone_from(repo_url, repo_dir) - print(f'Finished cloning {repo_name}') + try: + git.Repo.clone_from(repo_url, repo_dir) + print(f'Finished cloning {repo_name}') + except git.exc.GitCommandError as e: + print(f"Error cloning {repo_name}: {e}") else: print(f'{repo_name} already exists, skipping...') @@ -30,26 +66,36 @@ def main(): """ Main function to clone starred GitHub repositories. """ - # Your GitHub username - username = 'Enter Your Username' - # Example username = 'manupawickramasinghe' + parser = argparse.ArgumentParser(description='Clone GitHub repositories.') + parser.add_argument('--token', required=True, help='GitHub Personal Access Token.') - # Your GitHub personal access token - token = 'Enter Your PAT' - # Example token = '123123123133' + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('--starred', metavar='USERNAME', help='Clone starred repositories for a user.') + group.add_argument('--org', metavar='ORG_NAME', help='Clone repositories from an organization.') - # Directory to clone repos into - clone_dir = 'starred_repos' + parser.add_argument('--clone-dir', default='repos', help='Directory to clone repositories into.') + + args = parser.parse_args() # Create the directory if it doesn't exist - if not os.path.exists(clone_dir): - os.makedirs(clone_dir) + if not os.path.exists(args.clone_dir): + os.makedirs(args.clone_dir) try: - repos = get_starred_repos(username, token) + if args.starred: + print(f"Fetching starred repositories for {args.starred}...") + repos = get_starred_repos(args.starred, args.token) + elif args.org: + print(f"Fetching repositories for organization {args.org}...") + repos = get_org_repos(args.org, args.token) + + print(f"Found {len(repos)} repositories to clone.") + for repo in repos: - clone_repo(repo, clone_dir) - print('All repositories have been cloned.') + clone_repo(repo, args.clone_dir) + + print('All repositories have been processed.') + except requests.exceptions.RequestException as e: print(f"Error fetching repositories: {e}") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..06835f8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +certifi==2025.8.3 +charset-normalizer==3.4.3 +gitdb==4.0.12 +GitPython==3.1.45 +greenlet==3.2.3 +idna==3.10 +playwright==1.54.0 +pyee==13.0.0 +requests==2.32.5 +smmap==5.0.2 +typing_extensions==4.14.1 +urllib3==2.5.0 diff --git a/tests/test_github_clone.py b/tests/test_github_clone.py index c1626dd..8706480 100644 --- a/tests/test_github_clone.py +++ b/tests/test_github_clone.py @@ -1,67 +1,166 @@ import unittest -from unittest.mock import patch, MagicMock +from unittest.mock import patch, MagicMock, call import os import sys +import argparse +import git # Add the root directory to the Python path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) -from github_clone import get_starred_repos, clone_repo +from github_clone import get_paged_data, get_starred_repos, get_org_repos, clone_repo, main class TestGitHubClone(unittest.TestCase): @patch('github_clone.requests.get') - def test_get_starred_repos_success(self, mock_get): - # Mock the API response + def test_get_paged_data_single_page(self, mock_get): + # Mock the API response for a single page mock_response = MagicMock() mock_response.json.return_value = [{'name': 'repo1'}, {'name': 'repo2'}] mock_response.raise_for_status = MagicMock() + mock_response.links = {} + mock_response.headers = {'X-RateLimit-Remaining': '5000', 'X-RateLimit-Reset': '1678886400'} mock_get.return_value = mock_response - repos = get_starred_repos('testuser', 'testtoken') + repos = get_paged_data('http://example.com/repos', {'Authorization': 'token test'}) self.assertEqual(len(repos), 2) - self.assertEqual(repos[0]['name'], 'repo1') - mock_get.assert_called_with('https://api.github.com/users/testuser/starred', auth=('testuser', 'testtoken')) + mock_get.assert_called_once_with('http://example.com/repos', headers={'Authorization': 'token test'}) @patch('github_clone.requests.get') - def test_get_starred_repos_failure(self, mock_get): - # Mock a failed API response - mock_response = MagicMock() - mock_response.raise_for_status.side_effect = Exception("API Error") - mock_get.return_value = mock_response + def test_get_paged_data_multiple_pages(self, mock_get): + # Mock the API response for multiple pages + mock_response1 = MagicMock() + mock_response1.json.return_value = [{'name': 'repo1'}] + mock_response1.raise_for_status = MagicMock() + mock_response1.links = {'next': {'url': 'http://example.com/repos?page=2'}} + mock_response1.headers = {'X-RateLimit-Remaining': '5000', 'X-RateLimit-Reset': '1678886400'} + + mock_response2 = MagicMock() + mock_response2.json.return_value = [{'name': 'repo2'}] + mock_response2.raise_for_status = MagicMock() + mock_response2.links = {} + mock_response2.headers = {'X-RateLimit-Remaining': '4999', 'X-RateLimit-Reset': '1678886400'} + + mock_get.side_effect = [mock_response1, mock_response2] + + repos = get_paged_data('http://example.com/repos', {'Authorization': 'token test'}) + self.assertEqual(len(repos), 2) + self.assertEqual(mock_get.call_count, 2) + mock_get.assert_has_calls([ + call('http://example.com/repos', headers={'Authorization': 'token test'}), + call('http://example.com/repos?page=2', headers={'Authorization': 'token test'}) + ]) - with self.assertRaises(Exception): - get_starred_repos('testuser', 'testtoken') + @patch('github_clone.time.sleep') + @patch('github_clone.requests.get') + def test_get_paged_data_rate_limiting(self, mock_get, mock_sleep): + # Mock the API response for rate limiting + # The time.time() call will be mocked to a fixed value. + current_time = 1678886390.0 + reset_time = current_time + 10 + + mock_response1 = MagicMock() + mock_response1.json.return_value = [{'name': 'repo1'}] + mock_response1.raise_for_status = MagicMock() + mock_response1.links = {'next': {'url': 'http://example.com/repos?page=2'}} + mock_response1.headers = {'X-RateLimit-Remaining': '0', 'X-RateLimit-Reset': str(int(reset_time))} + + mock_response2 = MagicMock() + mock_response2.json.return_value = [{'name': 'repo2'}] + mock_response2.raise_for_status = MagicMock() + mock_response2.links = {} + mock_response2.headers = {'X-RateLimit-Remaining': '5000', 'X-RateLimit-Reset': str(int(reset_time) + 3600)} + + mock_get.side_effect = [mock_response1, mock_response2] + + with patch('time.time', return_value=current_time): + get_paged_data('http://example.com/repos', {'Authorization': 'token test'}) + + mock_sleep.assert_called_once() + # The sleep duration should be slightly more than 10 seconds + self.assertAlmostEqual(mock_sleep.call_args[0][0], 10, delta=1) + + @patch('github_clone.get_paged_data') + def test_get_starred_repos(self, mock_get_paged_data): + get_starred_repos('testuser', 'testtoken') + mock_get_paged_data.assert_called_with( + 'https://api.github.com/users/testuser/starred', + {'Authorization': 'token testtoken'} + ) + + @patch('github_clone.get_paged_data') + def test_get_org_repos(self, mock_get_paged_data): + get_org_repos('testorg', 'testtoken') + mock_get_paged_data.assert_called_with( + 'https://api.github.com/orgs/testorg/repos', + {'Authorization': 'token testtoken'} + ) @patch('github_clone.git.Repo.clone_from') @patch('github_clone.os.path.exists') def test_clone_repo_new(self, mock_exists, mock_clone_from): - # Mock that the repo does not exist mock_exists.return_value = False - repo_info = {'name': 'new_repo', 'clone_url': 'http://example.com/new_repo.git'} - clone_dir = 'test_dir' - - clone_repo(repo_info, clone_dir) - - repo_dir = os.path.join(clone_dir, repo_info['name']) - mock_exists.assert_called_with(repo_dir) - mock_clone_from.assert_called_with(repo_info['clone_url'], repo_dir) + clone_repo(repo_info, 'test_dir') + mock_clone_from.assert_called_with('http://example.com/new_repo.git', os.path.join('test_dir', 'new_repo')) @patch('github_clone.git.Repo.clone_from') @patch('github_clone.os.path.exists') def test_clone_repo_exists(self, mock_exists, mock_clone_from): - # Mock that the repo already exists mock_exists.return_value = True - repo_info = {'name': 'existing_repo', 'clone_url': 'http://example.com/existing_repo.git'} - clone_dir = 'test_dir' - - clone_repo(repo_info, clone_dir) - - repo_dir = os.path.join(clone_dir, repo_info['name']) - mock_exists.assert_called_with(repo_dir) + clone_repo(repo_info, 'test_dir') mock_clone_from.assert_not_called() + @patch('github_clone.git.Repo.clone_from') + @patch('github_clone.os.path.exists') + def test_clone_repo_error(self, mock_exists, mock_clone_from): + mock_exists.return_value = False + mock_clone_from.side_effect = git.exc.GitCommandError('clone', 'error') + repo_info = {'name': 'error_repo', 'clone_url': 'http://example.com/error_repo.git'} + with patch('builtins.print') as mock_print: + clone_repo(repo_info, 'test_dir') + self.assertIn("Error cloning error_repo", mock_print.call_args_list[1][0][0]) + + @patch('github_clone.argparse.ArgumentParser.parse_args') + @patch('github_clone.get_starred_repos') + @patch('github_clone.clone_repo') + @patch('github_clone.os.makedirs') + @patch('github_clone.os.path.exists', return_value=False) + def test_main_starred(self, mock_exists, mock_makedirs, mock_clone_repo, mock_get_starred_repos, mock_parse_args): + mock_parse_args.return_value = argparse.Namespace( + token='testtoken', + starred='testuser', + org=None, + clone_dir='test_clone_dir' + ) + mock_get_starred_repos.return_value = [{'name': 'repo1'}, {'name': 'repo2'}] + + main() + + mock_get_starred_repos.assert_called_with('testuser', 'testtoken') + self.assertEqual(mock_clone_repo.call_count, 2) + mock_makedirs.assert_called_with('test_clone_dir') + + @patch('github_clone.argparse.ArgumentParser.parse_args') + @patch('github_clone.get_org_repos') + @patch('github_clone.clone_repo') + @patch('github_clone.os.makedirs') + @patch('github_clone.os.path.exists', return_value=False) + def test_main_org(self, mock_exists, mock_makedirs, mock_clone_repo, mock_get_org_repos, mock_parse_args): + mock_parse_args.return_value = argparse.Namespace( + token='testtoken', + starred=None, + org='testorg', + clone_dir='test_clone_dir' + ) + mock_get_org_repos.return_value = [{'name': 'repo1'}, {'name': 'repo2'}] + + main() + + mock_get_org_repos.assert_called_with('testorg', 'testtoken') + self.assertEqual(mock_clone_repo.call_count, 2) + mock_makedirs.assert_called_with('test_clone_dir') + if __name__ == '__main__': unittest.main()