-
Notifications
You must be signed in to change notification settings - Fork 53
Expand file tree
/
Copy pathllmfoundry_usage.py
More file actions
103 lines (78 loc) · 2.89 KB
/
llmfoundry_usage.py
File metadata and controls
103 lines (78 loc) · 2.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# /// script
# requires-python = ">=3"
# dependencies = [
# "pandas==2.2.3",
# "pyarrow",
# "marimo",
# "requests==2.32.3",
# "openpyxl",
# ]
# ///
import marimo
__generated_with = "0.9.1"
app = marimo.App(width="medium")
@app.cell
def __(mo):
mo.md(
r"""
# LLM Foundry Usage
This app calculates the [LLM Foundry](https://llmfoundry.straive.com/) usage over the last 30 days.
Get `darwinbox.parquet` via:
```bash
rsync -avzP ubuntu@gramener.com:/mnt/gramener/apps/learn.gramener.com.v1/people/darwinbox.parquet
```
Then run this notebook.
```bash
uvx marimo edit llmfoundry_usage.py
```
It will create an `llmfoundry-metrics.xlsx` with 3 columns:
- `direct_manager_email`: email ID of the manager
- `report_count`: number of direct + indirect reportees
- `llmfoundry_sum`: number of LLM Foundry users
"""
)
return
@app.cell
def __():
import marimo as mo
import pandas as pd
import requests
stats = requests.get("https://llmfoundry.straive.com/log/stats").json()
users = {row["email"] for row in stats["users-by-date"]}
return mo, pd, requests, stats, users
@app.cell
def __(pd):
from typing import Dict, Set
def get_all_reports(manager: str, hierarchy: Dict[str, Set[str]]) -> Set[str]:
"""Recursively get all direct and indirect reports for a manager."""
reports = hierarchy.get(manager, set())
for report in list(reports):
reports |= get_all_reports(report, hierarchy)
return reports
def calculate_metrics(df: pd.DataFrame) -> pd.DataFrame:
"""Calculate metrics for each manager."""
hierarchy = df.groupby("direct_manager_email")["company_email_id"].apply(set).to_dict()
results = []
for manager in df["direct_manager_email"].unique():
reports = get_all_reports(manager, hierarchy)
results.append(
{
"direct_manager_email": manager,
"report_count": len(reports),
"llmfoundry_sum": df[df["company_email_id"].isin(reports)][
"llmfoundry_used"
].sum(),
}
)
return pd.DataFrame(results)
return Dict, Set, calculate_metrics, get_all_reports
@app.cell
def __(calculate_metrics, pd, users):
df = pd.read_parquet("darwinbox.parquet")
df["direct_manager_email"] = df["direct_manager_email"].str.lower()
df["company_email_id"] = df["company_email_id"].str.lower()
df["llmfoundry_used"] = df["company_email_id"].isin(users).astype(int)
calculate_metrics(df).to_excel("llmfoundry-metrics.xlsx", index=False)
return (df,)
if __name__ == "__main__":
app.run()