Skip to content
This repository was archived by the owner on May 23, 2023. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions github_stats/github_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,31 @@ def load_repo_stats(self, base_date=datetime.today(), window=DEFAULT_WINDOW):
f"Loaded repo stats in {self.stats['repo_stats']['collection_time']} seconds"
)

def return_releases(self, base_date=datetime.today(), window=DEFAULT_WINDOW):
"""
Get details about releases and return them without registering stats

As with PRs, we may want details about older releases, so
we don't filter the queries on time

:returns: List of Github Release dictionaries
:rtype: list
"""
self.log.info("Loading release details...")
td = base_date - timedelta(days=window)
url = f"/repos/{self.repo_name}/releases"
releases = []
for release in self._github_query(url):
name = release["name"]
dt_created = datetime.strptime(release["created_at"], "%Y-%m-%dT%H:%M:%SZ")
if dt_created > base_date:
self.log.debug(f"Release {name} was created in the future. Skipping.")
continue
if dt_created <= base_date and dt_created >= td:
releases.append(release)

return releases

def load_releases(self, base_date=datetime.today(), window=DEFAULT_WINDOW):
"""
Get details about releases
Expand Down
133 changes: 133 additions & 0 deletions view-release-stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#!/usr/bin/env python3

from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
import copy
from datetime import datetime
import logging
import os
import time
import subprocess
import statistics

# local imports
from github_stats.github_api import GithubAccess
from github_stats.outputs.influx import InfluxOutput
from github_stats.util import load_config

SCRIPTDIR = os.path.dirname(os.path.realpath(__file__))

"""
Utility script to view release statistics (eg lead time).
Maybe the basis for different stats collection in the future.
Run like: poetry run python view-release-stats.py -c config.yml -w 30
to see stats for rleeases in the last 30 days
"""


def cli_opts():
"""
Process CLI options
"""
parser = ArgumentParser(
description="Collect data about Github Releases",
formatter_class=ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--debug", action="store_true", default=False, help="Show debug information"
)
parser.add_argument(
"-c",
"--config",
default=f"{SCRIPTDIR}/config.yml",
help="Config file location",
)
parser.add_argument(
"-w",
"--window",
default=1,
type=int,
help="Number of days worth of data to collect",
)
parser.add_argument(
"--timestamp",
default=time.time(),
type=float,
help="UTC timestamp to start looking at data from",
)
return parser.parse_args()


def get_time_delta_in_minutes_from_iso_strings(timeStringA, timeStringB):
timeA = datetime.fromisoformat(timeStringA)
timeB = datetime.fromisoformat(timeStringB)
delta = timeA - timeB
return delta.total_seconds() / 60


def main():
args = cli_opts()
logger = logging.getLogger("github-stats")
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler())
if args.debug:
logger.setLevel(logging.DEBUG)
config = load_config(args.config)
for repo in config["repos"]:
local_config = copy.deepcopy(config)
local_config.pop("repos", None)
local_config["repo"] = repo
timestamp = datetime.utcfromtimestamp(args.timestamp)
starttime = time.time()
gh = GithubAccess(local_config)
last_release = None
releases = []
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could probably just be releases = return_releases(timestamp, args.window) instead of a loop.

for release in gh.return_releases(timestamp, args.window):
releases.append(release)
releases.reverse() # Reverse so we have chronological
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could the return_releases function handle the reverse instead? Or even make that a switch (e.g. return_releases(timestamp, args.window, reverse=True)) that we could use?


all_releases_commit_deltas_in_minutes = []
all_releases_total_delta_in_minutes = 0
all_releases_total_commits = 0
for release in releases:
release_date_in_utc = release['published_at'].rstrip('Z') + '+00:00' # datetime.fromisoformat hates the Z
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should see if another library doesn't hate the Z to avoid a brittle string comparison here.

logger.log(logging.INFO, f"{release['tag_name']} on {release_date_in_utc} by {release['author']['login']}")

if (last_release):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No parentheses when there's no need.

commit_times_output = subprocess.check_output(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This step should be a function in github_stats/gitops.py to:

  1. leverage the existing auth that we're using with pygit2
  2. make the pattern consistent
  3. allow for more consistent error handling

["git", "log", "--format=%cI", f"{release['tag_name']}...{last_release['tag_name']}"],
cwd=f"{SCRIPTDIR}/repos/{repo['name']}"
).decode()
commit_times_split = commit_times_output.split('\n')
commit_times = [i for i in commit_times_split if i] # eliminate empty strings
total_delta = 0
deltas_in_minutes = []
for commit_time in commit_times:
delta_in_minutes = get_time_delta_in_minutes_from_iso_strings(release_date_in_utc, commit_time)
deltas_in_minutes.append(delta_in_minutes)
total_delta += delta_in_minutes
release_average_delta_in_hours = round(total_delta / 60 / len(commit_times))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Magic numbers aren't great. If we can either add comments explaining them or use a variable instead...

release_median_delta_in_hours = round(statistics.median(deltas_in_minutes) / 60)
lead_time_msg = "lead time for commit in release, in hours"
logger.log(logging.INFO, f"Average {lead_time_msg}: {release_average_delta_in_hours}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logger.info instead of logger.log(logging.INFO, is the standard format for log messages.

logger.log(logging.INFO, f"Median {lead_time_msg}: {release_median_delta_in_hours}")

all_releases_total_commits += len(commit_times)
all_releases_total_delta_in_minutes += total_delta
all_releases_commit_deltas_in_minutes += deltas_in_minutes

last_release = release

if len(releases) > 0:
average_in_hours = round(all_releases_total_delta_in_minutes / 60 / all_releases_total_commits)
median_in_hours = round(statistics.median(all_releases_commit_deltas_in_minutes) / 60)
window_message = f"{args.window} days before {timestamp}"
lead_time_msg = "lead time for commit->release, in hours"
logger.log(logging.INFO, f"Analyzed {len(releases)} releases found in {window_message}")
logger.log(logging.INFO, f"Average {lead_time_msg}: {average_in_hours}")
logger.log(logging.INFO, f"Median {lead_time_msg}: {median_in_hours}")
else:
logger.log(logging.INFO, f"Found no releases in specified window of {window_message}")


if __name__ == "__main__":
main()