-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgithub_org_backup.py
More file actions
232 lines (198 loc) · 8.64 KB
/
github_org_backup.py
File metadata and controls
232 lines (198 loc) · 8.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#!/usr/bin/env python3
"""
github_org_backup.py
Clone or update every repository in a GitHub organization.
Supports passing a custom SSH key, logs to a timestamped file,
and rotates old logs (keeps N days).
USAGE:
python github_org_backup.py --org ORG_NAME [options]
EXAMPLES:
python github_org_backup.py --org my-org --ssh-key ~/.ssh/id_backup --log-dir /var/log/github-backups --retention-days 30
"""
from __future__ import annotations
import argparse
import os
import re
import sys
import time
import subprocess
from typing import Dict, Iterable, List, Optional, Tuple
import json
import urllib.request
import urllib.error
from datetime import datetime, timedelta
API = "https://api.github.com"
GITHUB_API_VERSION = "2022-11-28"
PER_PAGE = 100
# --------------------------- Logging ---------------------------
log_file = None
def init_logger(log_dir: str, org: str) -> str:
"""Initialize log file and return its path."""
os.makedirs(log_dir, exist_ok=True)
ts = datetime.now().strftime("%Y%m%d-%H%M%S")
path = os.path.join(log_dir, f"{org}-backup-{ts}.log")
global log_file
log_file = open(path, "a", buffering=1) # line-buffered
return path
def log(msg: str, stream=sys.stdout) -> None:
"""Write message to console and log file with timestamp."""
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
line = f"[{ts}] {msg}"
print(line, file=stream)
if log_file:
print(line, file=log_file)
def rotate_logs(log_dir: str, org: str, retention_days: int) -> None:
"""
Delete log files older than retention_days.
Matches files like: {org}-backup-YYYYMMDD-HHMMSS.log
Uses file modification time for robustness.
"""
try:
cutoff = time.time() - retention_days * 86400
pattern = re.compile(re.escape(f"{org}-backup-") + r"\d{8}-\d{6}\.log$")
removed = 0
for entry in os.scandir(log_dir):
if not entry.is_file():
continue
name = entry.name
if not pattern.match(name):
continue
try:
mtime = entry.stat().st_mtime
if mtime < cutoff:
os.remove(entry.path)
removed += 1
except Exception as e:
log(f"[warn] Failed to inspect/remove {name}: {e}", stream=sys.stderr)
if removed:
log(f"[logs] Rotated: removed {removed} old log file(s)")
except FileNotFoundError:
# log_dir may not exist yet; nothing to rotate
return
# --------------------------- Utilities ---------------------------
def run(cmd: List[str], cwd: Optional[str] = None, check: bool = True, extra_env: Optional[dict] = None) -> subprocess.CompletedProcess:
env = os.environ.copy()
if extra_env:
env.update(extra_env)
proc = subprocess.run(cmd, cwd=cwd, check=False, text=True, env=env)
if check and proc.returncode != 0:
raise subprocess.CalledProcessError(proc.returncode, cmd)
return proc
def already_cloned(repo_dir: str) -> bool:
return os.path.isdir(os.path.join(repo_dir, ".git"))
def parse_link_header(link_header: str) -> Dict[str, str]:
links: Dict[str, str] = {}
if not link_header:
return links
for part in link_header.split(","):
m = re.match(r'<([^>]+)>;\s*rel="([^"]+)"', part.strip())
if m:
url, rel = m.group(1), m.group(2)
links[rel] = url
return links
def gh_request(url: str, token: Optional[str]) -> Tuple[List[dict], Optional[str]]:
headers = {
"Accept": "application/vnd.github+json",
"X-GitHub-Api-Version": GITHUB_API_VERSION,
"User-Agent": "org-backup-script"
}
if token:
headers["Authorization"] = f"Bearer {token}"
req = urllib.request.Request(url, headers=headers)
while True:
try:
with urllib.request.urlopen(req) as resp:
data = json.load(resp)
links = parse_link_header(resp.headers.get("Link", ""))
return data, links.get("next")
except urllib.error.HTTPError as e:
if e.code == 403 and e.headers.get("X-RateLimit-Remaining") == "0":
reset_ts = int(e.headers.get("X-RateLimit-Reset", "0"))
sleep_for = max(0, reset_ts - int(time.time()) + 2)
log(f"[rate-limit] Sleeping {sleep_for}s...", stream=sys.stderr)
time.sleep(sleep_for)
continue
raise
except urllib.error.URLError as e:
log(f"[warn] Network error: {e}. Retrying in 3s...", stream=sys.stderr)
time.sleep(3)
continue
# ----------------------- Git operations -------------------------
def build_clone_url(repo: dict, mode: str) -> str:
return repo["ssh_url"] if mode == "ssh" else repo["clone_url"]
def git_env(ssh_key: Optional[str]) -> dict:
if ssh_key:
return {"GIT_SSH_COMMAND": f"ssh -i {ssh_key} -o IdentitiesOnly=yes"}
return {}
def clone_repo(repo: dict, dest_dir: str, mode: str, ssh_key: Optional[str], update_submodules: bool) -> None:
url = build_clone_url(repo, mode)
name = repo["name"]
log(f"[clone] {name}")
cmd = ["git", "clone"]
if update_submodules:
cmd.append("--recursive")
cmd.extend([url, name])
run(cmd, cwd=dest_dir, extra_env=git_env(ssh_key))
def update_repo(repo_dir: str, ssh_key: Optional[str], update_submodules: bool) -> None:
log(f"[update] {os.path.basename(repo_dir)}")
env = git_env(ssh_key)
run(["git", "fetch", "--all", "--prune"], cwd=repo_dir, extra_env=env)
try:
run(["git", "pull", "--ff-only"], cwd=repo_dir, extra_env=env)
except subprocess.CalledProcessError:
run(["git", "fetch", "--all", "--prune"], cwd=repo_dir, extra_env=env)
if update_submodules:
run(["git", "submodule", "update", "--init", "--recursive"], cwd=repo_dir, extra_env=env)
def clone_or_update_repo(repo: dict, base_dir: str, mode: str, ssh_key: Optional[str], update_submodules: bool) -> None:
repo_dir = os.path.join(base_dir, repo["name"])
if already_cloned(repo_dir):
update_repo(repo_dir, ssh_key, update_submodules)
else:
clone_repo(repo, base_dir, mode, ssh_key, update_submodules)
# ----------------------- Repo listing ---------------------------
def list_org_repos(org: str, token: Optional[str], repo_type: str, include_forks: bool, include_archived: bool) -> Iterable[dict]:
url = f"{API}/orgs/{org}/repos?type={repo_type}&per_page={PER_PAGE}"
while url:
data, next_url = gh_request(url, token)
for repo in data:
if (not include_forks and repo.get("fork")):
continue
if (not include_archived and repo.get("archived")):
continue
yield repo
url = next_url
# ------------------------ Main program --------------------------
def main():
p = argparse.ArgumentParser(description="Clone or update all repos in a GitHub organization.")
p.add_argument("--org", required=True)
p.add_argument("--token")
p.add_argument("--mode", choices=["ssh", "https"], default="ssh")
p.add_argument("--ssh-key", help="Path to SSH private key (only used in ssh mode)")
p.add_argument("--type", default="all", choices=["all", "public", "private", "member"])
p.add_argument("--include-forks", action="store_true")
p.add_argument("--include-archived", action="store_true")
p.add_argument("--dir", help="Target directory (default: ./ORG)")
p.add_argument("--update-submodules", action="store_true")
p.add_argument("--log-dir", default="./logs", help="Directory to store logs")
p.add_argument("--retention-days", type=int, default=30, help="Days to keep logs (default: 30)")
args = p.parse_args()
# Rotate old logs first (in case the disk is tight)
rotate_logs(args.log_dir, args.org, args.retention_days)
# Initialize logging
log_path = init_logger(args.log_dir, args.org)
log(f"Log file: {log_path}")
log(f"Retention: keep last {args.retention_days} day(s)")
target_dir = args.dir or os.path.abspath(os.path.join(".", args.org))
os.makedirs(target_dir, exist_ok=True)
log(f"Organization: {args.org}")
log(f"Mode: {args.mode}")
if args.mode == "ssh":
log(f"SSH key: {args.ssh_key or '(default agent/config)'}")
log(f"Target dir: {target_dir}")
for repo in list_org_repos(args.org, args.token, args.type, args.include_forks, args.include_archived):
try:
clone_or_update_repo(repo, target_dir, args.mode, args.ssh_key, args.update_submodules)
except Exception as e:
log(f"[error] {repo.get('name')}: {e}", stream=sys.stderr)
if __name__ == "__main__":
main()