Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions .github/workflows/update-azure-coverage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
name: Update Azure Coverage Data

on:
schedule:
- cron: 0 5 * * MON
workflow_dispatch:
inputs:
targetBranch:
required: true
type: string
description: "Branch to checkout and compare against (e.g. harshmishra/doc-91)"
pull_request:
types: [opened, synchronize]

jobs:
update-azure-coverage:
name: Update Azure coverage data
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- name: Checkout docs
uses: actions/checkout@v4
with:
fetch-depth: 0
path: docs
ref: ${{ github.event.inputs.targetBranch }}

- name: Set up system wide dependencies
run: |
sudo apt-get install jq wget

- name: Set up Python 3.11
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Download Azure implementation metrics artifact
working-directory: docs
run: bash ./scripts/get_latest_github_metrics.sh ./target main
env:
GITHUB_TOKEN: ${{ secrets.PRO_ACCESS_TOKEN }}
REPOSITORY_NAME: localstack-pro
ARTIFACT_ID: implemented_features_python-amd64.csv
WORKFLOW: "Az / Build, Test, Push"

- name: Generate Azure coverage JSON data
working-directory: docs
run: |
python3 scripts/create_azure_coverage.py -i target/implemented_features_python-amd64.csv/implemented_features.csv -o target/updated_azure_coverage
mv -f target/updated_azure_coverage/*.json src/data/azure-coverage/

- name: Check for changes
id: check-for-changes
working-directory: docs
env:
TARGET_BRANCH: ${{ github.event.inputs.targetBranch }}
run: |
mkdir -p resources
(git diff --name-only origin/automated-azure-coverage-updates src/data/azure-coverage/ 2>/dev/null || git diff --name-only "origin/$TARGET_BRANCH" src/data/azure-coverage/ 2>/dev/null) | tee -a resources/diff-check.log
echo "diff-count=$(cat resources/diff-check.log | wc -l)" >> "$GITHUB_OUTPUT"
cat resources/diff-check.log

- name: Create PR
uses: peter-evans/create-pull-request@v7
if: ${{ success() && steps.check-for-changes.outputs.diff-count != '0' && steps.check-for-changes.outputs.diff-count != '' }}
with:
path: docs
title: "Update Azure coverage data"
body: "Update generated Azure coverage JSON data from the latest LocalStack Pro parity metrics artifact."
branch: "automated-azure-coverage-updates"
author: "LocalStack Bot <localstack-bot@users.noreply.github.com>"
committer: "LocalStack Bot <localstack-bot@users.noreply.github.com>"
commit-message: "update generated azure coverage data"
token: ${{ secrets.PRO_ACCESS_TOKEN }}
139 changes: 139 additions & 0 deletions scripts/create_azure_coverage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""
Generate Azure coverage JSON files from implementation CSV data.
"""

import argparse
import csv
import json
from pathlib import Path
from typing import Any


def _as_bool(value: Any, default: bool = True) -> bool:
if value is None:
return default
if isinstance(value, bool):
return value
return str(value).strip().lower() in {"1", "true", "yes", "y"}


def _group_name(service_name: str, category: str) -> str:
service_name = (service_name or "").strip()
category = (category or "").strip()
if not category:
return service_name
if category.lower() in {"none", "null", "n/a"}:
return service_name
if category == service_name:
return service_name
return f"{service_name} ({category})"


def _normalize_provider(value: str) -> str:
return (value or "").strip().replace("_", ".")


def _resolve_input_csv(path: Path) -> Path:
if path.exists():
if path.is_file():
return path
# Support passing a directory that contains the extracted artifact.
nested_csv = path / "implemented_features.csv"
if nested_csv.exists():
return nested_csv
matches = sorted(path.rglob("implemented_features.csv"))
if matches:
return matches[0]
raise FileNotFoundError(f"No implemented_features.csv found under: {path}")

# Backward-compatible fallback for target/implemented_features.csv.
if path.name == "implemented_features.csv" and path.parent.exists():
matches = sorted(path.parent.rglob("implemented_features.csv"))
if matches:
return matches[0]

raise FileNotFoundError(f"Input CSV not found: {path}")


def _load_csv(path: Path) -> dict[str, dict[str, dict[str, dict[str, Any]]]]:
path = _resolve_input_csv(path)

coverage: dict[str, dict[str, dict[str, dict[str, Any]]]] = {}
with path.open(mode="r", encoding="utf-8") as file:
reader = csv.DictReader(file)
if not reader.fieldnames:
raise ValueError("Input CSV has no headers.")

for row in reader:
provider = _normalize_provider(row.get("resource_provider", ""))
if not provider:
continue

feature_name = (row.get("feature") or row.get("operation") or "").strip()
if not feature_name:
continue

group = _group_name(row.get("service", ""), row.get("category", ""))
if not group:
group = "General"

implemented = _as_bool(
row.get("implemented", row.get("is_implemented", row.get("isImplemented"))),
default=True,
)
pro_only = _as_bool(row.get("pro", row.get("is_pro", row.get("isPro"))), default=True)

provider_data = coverage.setdefault(provider, {})
group_data = provider_data.setdefault(group, {})
group_data[feature_name] = {
"implemented": implemented,
"pro": pro_only,
}

return coverage


def _sorted_details(details: dict[str, dict[str, dict[str, Any]]]) -> dict[str, dict[str, dict[str, Any]]]:
sorted_details: dict[str, dict[str, dict[str, Any]]] = {}
for group_name in sorted(details.keys()):
operations = details[group_name]
sorted_details[group_name] = dict(sorted(operations.items(), key=lambda item: item[0]))
return sorted_details


def write_coverage_files(coverage: dict[str, dict[str, dict[str, dict[str, Any]]]], output_dir: Path) -> None:
output_dir.mkdir(parents=True, exist_ok=True)
for provider in sorted(coverage.keys()):
payload = {
"service": provider,
"operations": [],
"details": _sorted_details(coverage[provider]),
}
file_path = output_dir / f"{provider}.json"
with file_path.open(mode="w", encoding="utf-8") as fd:
json.dump(payload, fd, indent=2)
fd.write("\n")


def main() -> None:
parser = argparse.ArgumentParser(description="Generate Azure coverage JSON data.")
parser.add_argument(
"-i",
"--implementation-details",
required=True,
help="Path to implementation details CSV.",
)
parser.add_argument(
"-o",
"--output-dir",
required=True,
help="Directory where generated JSON files will be written.",
)
args = parser.parse_args()

coverage = _load_csv(Path(args.implementation_details))
write_coverage_files(coverage, Path(args.output_dir))


if __name__ == "__main__":
main()
65 changes: 65 additions & 0 deletions scripts/get_latest_github_metrics.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/bin/bash
set -euo pipefail

# input params
PARENT_FOLDER=${1:-target}
METRICS_ARTIFACTS_BRANCH=${2:-main}

# env vars
REPOSITORY_NAME=${REPOSITORY_NAME:-localstack-pro}
ARTIFACT_ID=${ARTIFACT_ID:-implemented_features_python-amd64.csv}
WORKFLOW=${WORKFLOW:-"Az / Build, Test, Push"}
PREFIX_ARTIFACT=${PREFIX_ARTIFACT:-}
FILTER_SUCCESS=${FILTER_SUCCESS:-1}
LIMIT=${LIMIT:-20}

RESOURCE_FOLDER=${RESOURCE_FOLDER:-}
REPOSITORY_OWNER=${REPOSITORY_OWNER:-localstack}
TARGET_FOLDER="$PARENT_FOLDER/$RESOURCE_FOLDER"

TMP_FOLDER="$PARENT_FOLDER/tmp_download"
mkdir -p "$TMP_FOLDER"

echo "Searching for artifact '$ARTIFACT_ID' in workflow '$WORKFLOW' on branch '$METRICS_ARTIFACTS_BRANCH' in repo '$REPOSITORY_OWNER/$REPOSITORY_NAME'."

if [ "$FILTER_SUCCESS" = "1" ]; then
echo "Filtering runs by conclusion=success"
SELECTOR='.[] | select(.conclusion=="success")'
else
echo "Filtering runs by completed status (success/failure)"
SELECTOR='.[] | select(.status=="completed" and (.conclusion=="failure" or .conclusion=="success"))'
fi

RUN_IDS=$(gh run list --limit "$LIMIT" --branch "$METRICS_ARTIFACTS_BRANCH" --repo "$REPOSITORY_OWNER/$REPOSITORY_NAME" --workflow "$WORKFLOW" --json databaseId,conclusion,status --jq "$SELECTOR")

if [ "$(echo "$RUN_IDS" | jq -rs '.[0].databaseId')" = "null" ]; then
echo "No matching workflow run found."
exit 1
fi

for ((i=0; i<LIMIT; i++)); do
RUN_ID=$(echo "$RUN_IDS" | jq -rs ".[$i].databaseId")
echo "Trying run id: $RUN_ID"

gh run download "$RUN_ID" --repo "$REPOSITORY_OWNER/$REPOSITORY_NAME" -p "$ARTIFACT_ID" -D "$TMP_FOLDER" || true

if [ "$(ls -1 "$TMP_FOLDER" 2>/dev/null | wc -l)" -gt 0 ]; then
echo "Downloaded artifact successfully."
break
fi
done

echo "Moving artifact to $TARGET_FOLDER"
mkdir -p "$TARGET_FOLDER"
if [[ -z "${PREFIX_ARTIFACT}" ]]; then
cp -R "$TMP_FOLDER"/. "$TARGET_FOLDER"/
else
while IFS= read -r file; do
org_file_name=$(echo "$file" | sed "s/.*\///")
mv -- "$file" "$TARGET_FOLDER/$PREFIX_ARTIFACT-$org_file_name"
done < <(find "$TMP_FOLDER" -type f -name "*.csv")
fi

rm -rf "$TMP_FOLDER"
echo "Contents of $TARGET_FOLDER:"
ls -la "$TARGET_FOLDER"
Loading