Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 25 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@

# GitHub Starred Repository Cloner
# GitHub Repository Cloner

![Python CI](.github/workflows/ci.yml/badge.svg)

This Python script automates the process of cloning all repositories starred by a GitHub user. Simply provide your GitHub username and Personal Access Token (PAT), and it will fetch and clone the repositories into a local directory.
This Python script automates the process of cloning repositories from GitHub. It can clone all repositories starred by a user or all repositories from a specific organization.

## Features
- Automatically fetches starred repositories.
- Clones each repository locally.
- Clone all starred repositories for a user.
- Clone all repositories from a GitHub organization.
- Command-line interface to specify what to clone.
- Handles GitHub API pagination to fetch all repositories.
- Includes rate limiting handling to avoid API request failures.
- Skips already cloned repositories to avoid duplication.

## Requirements
- Python 3.x
- `requests` library (`pip install requests`)
- `GitPython` library (`pip install gitpython`)
- `requests` library
- `GitPython` library
- A GitHub Personal Access Token (PAT)

## Installation & Usage
Expand All @@ -29,19 +31,27 @@ cd clone-github-starred
pip install -r requirements.txt
```

### 3. Configure Your Credentials
Edit `github_clone.py` and set your GitHub username and PAT:
```python
username = 'Enter Your Username'
token = 'Enter Your PAT'
### 3. Run the Script
Execute the script using the command-line interface. You must provide a GitHub Personal Access Token (PAT) with the `--token` argument.

#### To clone starred repositories:
Use the `--starred` argument with your GitHub username.
```bash
python github_clone.py --token YOUR_PAT --starred YOUR_USERNAME
```

### 4. Run the Script
Execute the script to clone starred repositories:
#### To clone repositories from an organization:
Use the `--org` argument with the organization name.
```bash
python github_clone.py
python github_clone.py --token YOUR_PAT --org ORGANIZATION_NAME
```

#### Optional Arguments:
- `--clone-dir`: Specify a directory to clone the repositories into. Defaults to `repos`.
```bash
python github_clone.py --token YOUR_PAT --starred YOUR_USERNAME --clone-dir my_starred_repos
```

## Troubleshooting
- Ensure your PAT has the necessary permissions to read repository data.
- Verify your network connection if cloning fails.
Expand All @@ -52,6 +62,3 @@ Feel free to submit issues or pull requests to enhance the functionality.

## License
This project is licensed under the MIT License.

```
```
82 changes: 64 additions & 18 deletions github_clone.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,48 @@
import os
import requests
import git
import argparse
import time

def get_paged_data(url, headers):
"""
Fetches all pages of data from a paginated GitHub API endpoint.
"""
repos = []
while url:
response = requests.get(url, headers=headers)
response.raise_for_status()
repos.extend(response.json())

if 'next' in response.links:
url = response.links['next']['url']
else:
url = None

# Handle rate limiting
if int(response.headers.get('X-RateLimit-Remaining', 1)) == 0:
reset_time = int(response.headers.get('X-RateLimit-Reset', 0))
sleep_duration = max(0, reset_time - time.time())
print(f"Rate limit exceeded. Waiting for {sleep_duration:.0f} seconds.")
time.sleep(sleep_duration)

return repos

def get_starred_repos(username, token):
"""
Fetches the list of starred repositories for a given user.
"""
url = f'https://api.github.com/users/{username}/starred'
response = requests.get(url, auth=(username, token))
response.raise_for_status() # Raise an exception for bad status codes
return response.json()
headers = {'Authorization': f'token {token}'}
return get_paged_data(url, headers)

def get_org_repos(org, token):
"""
Fetches the list of repositories for a given organization.
"""
url = f'https://api.github.com/orgs/{org}/repos'
headers = {'Authorization': f'token {token}'}
return get_paged_data(url, headers)

def clone_repo(repo_info, clone_dir):
"""
Expand All @@ -21,35 +54,48 @@ def clone_repo(repo_info, clone_dir):

if not os.path.exists(repo_dir):
print(f'Cloning {repo_name}...')
git.Repo.clone_from(repo_url, repo_dir)
print(f'Finished cloning {repo_name}')
try:
git.Repo.clone_from(repo_url, repo_dir)
print(f'Finished cloning {repo_name}')
except git.exc.GitCommandError as e:
print(f"Error cloning {repo_name}: {e}")
else:
print(f'{repo_name} already exists, skipping...')

def main():
"""
Main function to clone starred GitHub repositories.
"""
# Your GitHub username
username = 'Enter Your Username'
# Example username = 'manupawickramasinghe'
parser = argparse.ArgumentParser(description='Clone GitHub repositories.')
parser.add_argument('--token', required=True, help='GitHub Personal Access Token.')

# Your GitHub personal access token
token = 'Enter Your PAT'
# Example token = '123123123133'
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--starred', metavar='USERNAME', help='Clone starred repositories for a user.')
group.add_argument('--org', metavar='ORG_NAME', help='Clone repositories from an organization.')

# Directory to clone repos into
clone_dir = 'starred_repos'
parser.add_argument('--clone-dir', default='repos', help='Directory to clone repositories into.')

args = parser.parse_args()

# Create the directory if it doesn't exist
if not os.path.exists(clone_dir):
os.makedirs(clone_dir)
if not os.path.exists(args.clone_dir):
os.makedirs(args.clone_dir)

try:
repos = get_starred_repos(username, token)
if args.starred:
print(f"Fetching starred repositories for {args.starred}...")
repos = get_starred_repos(args.starred, args.token)
elif args.org:
print(f"Fetching repositories for organization {args.org}...")
repos = get_org_repos(args.org, args.token)

print(f"Found {len(repos)} repositories to clone.")

for repo in repos:
clone_repo(repo, clone_dir)
print('All repositories have been cloned.')
clone_repo(repo, args.clone_dir)

print('All repositories have been processed.')

except requests.exceptions.RequestException as e:
print(f"Error fetching repositories: {e}")

Expand Down
12 changes: 12 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
certifi==2025.8.3
charset-normalizer==3.4.3
gitdb==4.0.12
GitPython==3.1.45
greenlet==3.2.3
idna==3.10
playwright==1.54.0
pyee==13.0.0
requests==2.32.5
smmap==5.0.2
typing_extensions==4.14.1
urllib3==2.5.0
161 changes: 130 additions & 31 deletions tests/test_github_clone.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,166 @@
import unittest
from unittest.mock import patch, MagicMock
from unittest.mock import patch, MagicMock, call
import os
import sys
import argparse
import git

# Add the root directory to the Python path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from github_clone import get_starred_repos, clone_repo
from github_clone import get_paged_data, get_starred_repos, get_org_repos, clone_repo, main

class TestGitHubClone(unittest.TestCase):

@patch('github_clone.requests.get')
def test_get_starred_repos_success(self, mock_get):
# Mock the API response
def test_get_paged_data_single_page(self, mock_get):
# Mock the API response for a single page
mock_response = MagicMock()
mock_response.json.return_value = [{'name': 'repo1'}, {'name': 'repo2'}]
mock_response.raise_for_status = MagicMock()
mock_response.links = {}
mock_response.headers = {'X-RateLimit-Remaining': '5000', 'X-RateLimit-Reset': '1678886400'}
mock_get.return_value = mock_response

repos = get_starred_repos('testuser', 'testtoken')
repos = get_paged_data('http://example.com/repos', {'Authorization': 'token test'})
self.assertEqual(len(repos), 2)
self.assertEqual(repos[0]['name'], 'repo1')
mock_get.assert_called_with('https://api.github.com/users/testuser/starred', auth=('testuser', 'testtoken'))
mock_get.assert_called_once_with('http://example.com/repos', headers={'Authorization': 'token test'})

@patch('github_clone.requests.get')
def test_get_starred_repos_failure(self, mock_get):
# Mock a failed API response
mock_response = MagicMock()
mock_response.raise_for_status.side_effect = Exception("API Error")
mock_get.return_value = mock_response
def test_get_paged_data_multiple_pages(self, mock_get):
# Mock the API response for multiple pages
mock_response1 = MagicMock()
mock_response1.json.return_value = [{'name': 'repo1'}]
mock_response1.raise_for_status = MagicMock()
mock_response1.links = {'next': {'url': 'http://example.com/repos?page=2'}}
mock_response1.headers = {'X-RateLimit-Remaining': '5000', 'X-RateLimit-Reset': '1678886400'}

mock_response2 = MagicMock()
mock_response2.json.return_value = [{'name': 'repo2'}]
mock_response2.raise_for_status = MagicMock()
mock_response2.links = {}
mock_response2.headers = {'X-RateLimit-Remaining': '4999', 'X-RateLimit-Reset': '1678886400'}

mock_get.side_effect = [mock_response1, mock_response2]

repos = get_paged_data('http://example.com/repos', {'Authorization': 'token test'})
self.assertEqual(len(repos), 2)
self.assertEqual(mock_get.call_count, 2)
mock_get.assert_has_calls([
call('http://example.com/repos', headers={'Authorization': 'token test'}),
call('http://example.com/repos?page=2', headers={'Authorization': 'token test'})
])

with self.assertRaises(Exception):
get_starred_repos('testuser', 'testtoken')
@patch('github_clone.time.sleep')
@patch('github_clone.requests.get')
def test_get_paged_data_rate_limiting(self, mock_get, mock_sleep):
# Mock the API response for rate limiting
# The time.time() call will be mocked to a fixed value.
current_time = 1678886390.0
reset_time = current_time + 10

mock_response1 = MagicMock()
mock_response1.json.return_value = [{'name': 'repo1'}]
mock_response1.raise_for_status = MagicMock()
mock_response1.links = {'next': {'url': 'http://example.com/repos?page=2'}}
mock_response1.headers = {'X-RateLimit-Remaining': '0', 'X-RateLimit-Reset': str(int(reset_time))}

mock_response2 = MagicMock()
mock_response2.json.return_value = [{'name': 'repo2'}]
mock_response2.raise_for_status = MagicMock()
mock_response2.links = {}
mock_response2.headers = {'X-RateLimit-Remaining': '5000', 'X-RateLimit-Reset': str(int(reset_time) + 3600)}

mock_get.side_effect = [mock_response1, mock_response2]

with patch('time.time', return_value=current_time):
get_paged_data('http://example.com/repos', {'Authorization': 'token test'})

mock_sleep.assert_called_once()
# The sleep duration should be slightly more than 10 seconds
self.assertAlmostEqual(mock_sleep.call_args[0][0], 10, delta=1)

@patch('github_clone.get_paged_data')
def test_get_starred_repos(self, mock_get_paged_data):
get_starred_repos('testuser', 'testtoken')
mock_get_paged_data.assert_called_with(
'https://api.github.com/users/testuser/starred',
{'Authorization': 'token testtoken'}
)

@patch('github_clone.get_paged_data')
def test_get_org_repos(self, mock_get_paged_data):
get_org_repos('testorg', 'testtoken')
mock_get_paged_data.assert_called_with(
'https://api.github.com/orgs/testorg/repos',
{'Authorization': 'token testtoken'}
)

@patch('github_clone.git.Repo.clone_from')
@patch('github_clone.os.path.exists')
def test_clone_repo_new(self, mock_exists, mock_clone_from):
# Mock that the repo does not exist
mock_exists.return_value = False

repo_info = {'name': 'new_repo', 'clone_url': 'http://example.com/new_repo.git'}
clone_dir = 'test_dir'

clone_repo(repo_info, clone_dir)

repo_dir = os.path.join(clone_dir, repo_info['name'])
mock_exists.assert_called_with(repo_dir)
mock_clone_from.assert_called_with(repo_info['clone_url'], repo_dir)
clone_repo(repo_info, 'test_dir')
mock_clone_from.assert_called_with('http://example.com/new_repo.git', os.path.join('test_dir', 'new_repo'))

@patch('github_clone.git.Repo.clone_from')
@patch('github_clone.os.path.exists')
def test_clone_repo_exists(self, mock_exists, mock_clone_from):
# Mock that the repo already exists
mock_exists.return_value = True

repo_info = {'name': 'existing_repo', 'clone_url': 'http://example.com/existing_repo.git'}
clone_dir = 'test_dir'

clone_repo(repo_info, clone_dir)

repo_dir = os.path.join(clone_dir, repo_info['name'])
mock_exists.assert_called_with(repo_dir)
clone_repo(repo_info, 'test_dir')
mock_clone_from.assert_not_called()

@patch('github_clone.git.Repo.clone_from')
@patch('github_clone.os.path.exists')
def test_clone_repo_error(self, mock_exists, mock_clone_from):
mock_exists.return_value = False
mock_clone_from.side_effect = git.exc.GitCommandError('clone', 'error')
repo_info = {'name': 'error_repo', 'clone_url': 'http://example.com/error_repo.git'}
with patch('builtins.print') as mock_print:
clone_repo(repo_info, 'test_dir')
self.assertIn("Error cloning error_repo", mock_print.call_args_list[1][0][0])

@patch('github_clone.argparse.ArgumentParser.parse_args')
@patch('github_clone.get_starred_repos')
@patch('github_clone.clone_repo')
@patch('github_clone.os.makedirs')
@patch('github_clone.os.path.exists', return_value=False)
def test_main_starred(self, mock_exists, mock_makedirs, mock_clone_repo, mock_get_starred_repos, mock_parse_args):
mock_parse_args.return_value = argparse.Namespace(
token='testtoken',
starred='testuser',
org=None,
clone_dir='test_clone_dir'
)
mock_get_starred_repos.return_value = [{'name': 'repo1'}, {'name': 'repo2'}]

main()

mock_get_starred_repos.assert_called_with('testuser', 'testtoken')
self.assertEqual(mock_clone_repo.call_count, 2)
mock_makedirs.assert_called_with('test_clone_dir')

@patch('github_clone.argparse.ArgumentParser.parse_args')
@patch('github_clone.get_org_repos')
@patch('github_clone.clone_repo')
@patch('github_clone.os.makedirs')
@patch('github_clone.os.path.exists', return_value=False)
def test_main_org(self, mock_exists, mock_makedirs, mock_clone_repo, mock_get_org_repos, mock_parse_args):
mock_parse_args.return_value = argparse.Namespace(
token='testtoken',
starred=None,
org='testorg',
clone_dir='test_clone_dir'
)
mock_get_org_repos.return_value = [{'name': 'repo1'}, {'name': 'repo2'}]

main()

mock_get_org_repos.assert_called_with('testorg', 'testtoken')
self.assertEqual(mock_clone_repo.call_count, 2)
mock_makedirs.assert_called_with('test_clone_dir')

if __name__ == '__main__':
unittest.main()