mostly-ai
diff --git a/‎README.md‎
Lines changed: 10 additions & 10 deletions b/‎README.md‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎examples/baseball-players.ipynb‎
Lines changed: 10 additions & 10 deletions b/‎examples/baseball-players.ipynb‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎examples/baseball-seasons.ipynb‎
Lines changed: 10 additions & 10 deletions b/‎examples/baseball-seasons.ipynb‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎examples/benchmark.ipynb‎
Lines changed: 2 additions & 2 deletions b/‎examples/benchmark.ipynb‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/quick-start.ipynb‎
Lines changed: 1 addition & 1 deletion b/‎examples/quick-start.ipynb‎
Lines changed: 1 addition & 1 deletion
@@ -27,21 +27,21 @@ import json
 from mostlyai import qa
 
 # fetch original + synthetic data
-base_url = 'https://github.com/mostly-ai/mostlyai-qa/raw/refs/heads/main/examples/quick-start'
-syn = pd.read_csv(f'{base_url}/census2k-syn_mostly.csv.gz')
+base_url = "https://github.com/mostly-ai/mostlyai-qa/raw/refs/heads/main/examples/quick-start"
+syn = pd.read_csv(f"{base_url}/census2k-syn_mostly.csv.gz")
 # syn = pd.read_csv(f'{base_url}/census2k-syn_flip30.csv.gz') # a 30% perturbation of trn
-trn = pd.read_csv(f'{base_url}/census2k-trn.csv.gz')
-hol = pd.read_csv(f'{base_url}/census2k-hol.csv.gz')
+trn = pd.read_csv(f"{base_url}/census2k-trn.csv.gz")
+hol = pd.read_csv(f"{base_url}/census2k-hol.csv.gz")
 
 # runs for ~30secs
 report_path, metrics = qa.report(
-    syn_tgt_data = syn,
-    trn_tgt_data = trn,
-    hol_tgt_data = hol,
+    syn_tgt_data=syn,
+    trn_tgt_data=trn,
+    hol_tgt_data=hol,
 )
 
 # pretty print metrics
-print(json.dumps(metrics, indent=4))
+print(metrics.model_dump_json(indent=4))
 
 # open up HTML report in new browser window
 webbrowser.open(f"file://{report_path.absolute()}")
@@ -104,7 +104,7 @@ def report(
     max_sample_size_embeddings: int | None = None,
     statistics_path: str | Path | None = None,
     on_progress: ProgressCallback | None = None,
-) -> tuple[Path, dict | None]:
+) -> tuple[Path, Metrics | None]:
     """
     Generate HTML report and metrics for comparing synthetic and original data samples.
 
@@ -128,7 +128,7 @@ def report(
         on_progress: A custom progress callback
     Returns:
         1. Path to the HTML report
-        2. Dictionary of calculated metrics:
+        2. Pydantic Metrics:
         - `accuracy`:  # Accuracy is defined as (100% - Total Variation Distance), for each distribution, and then averaged across.
           - `overall`: Overall accuracy of synthetic data, i.e. average across univariate, bivariate and coherence.
           - `univariate`: Average accuracy of discretized univariate distributions.
 
@@ -13,15 +13,15 @@
    "execution_count": null,
    "id": "082b3689-2807-420b-8bb7-a9a40cedf3c3",
    "metadata": {},
-   "outputs": [],
    "source": [
     "import pandas as pd\n",
     "import webbrowser\n",
     "from pathlib import Path\n",
     "from mostlyai.qa import report\n",
     "\n",
     "wdir = Path(\"baseball-players\")"
-   ]
+   ],
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -36,7 +36,6 @@
    "execution_count": null,
    "id": "95f6914a-e6cf-4e1f-8183-7f482228317f",
    "metadata": {},
-   "outputs": [],
    "source": [
     "report_path, metrics = report(\n",
     "    syn_tgt_data=pd.read_parquet(wdir / \"generated-data\"),\n",
@@ -46,17 +45,18 @@
     "    report_path=\"baseball-players.html\",\n",
     ")\n",
     "metrics"
-   ]
+   ],
+   "outputs": []
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "88046c1f-0343-4e15-a1d0-ab2191417492",
    "metadata": {},
-   "outputs": [],
    "source": [
     "webbrowser.open(f\"file://{report_path.absolute()}\")"
-   ]
+   ],
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -71,7 +71,6 @@
    "execution_count": null,
    "id": "b45c06d4-1a7e-4bf2-aa83-f3f6e411caa9",
    "metadata": {},
-   "outputs": [],
    "source": [
     "report_path, metrics = report(\n",
     "    syn_tgt_data=pd.read_parquet(wdir / \"generated-data\"),\n",
@@ -86,17 +85,18 @@
     "    report_path=\"baseball-players-with-context.html\",\n",
     ")\n",
     "metrics"
-   ]
+   ],
+   "outputs": []
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "00aa1fe7-6a9a-40c6-94d0-8b03632f1fa8",
    "metadata": {},
-   "outputs": [],
    "source": [
     "webbrowser.open(f\"file://{report_path.absolute()}\")"
-   ]
+   ],
+   "outputs": []
   }
  ],
  "metadata": {
 
@@ -13,15 +13,15 @@
    "execution_count": null,
    "id": "082b3689-2807-420b-8bb7-a9a40cedf3c3",
    "metadata": {},
-   "outputs": [],
    "source": [
     "import pandas as pd\n",
     "import webbrowser\n",
     "from pathlib import Path\n",
     "from mostlyai.qa import report\n",
     "\n",
     "wdir = Path(\"baseball-seasons\")"
-   ]
+   ],
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -36,7 +36,6 @@
    "execution_count": null,
    "id": "95f6914a-e6cf-4e1f-8183-7f482228317f",
    "metadata": {},
-   "outputs": [],
    "source": [
     "report_path, metrics = report(\n",
     "    syn_tgt_data=pd.read_parquet(wdir / \"generated-data\"),\n",
@@ -47,17 +46,18 @@
     "    report_path=\"baseball-seasons.html\",\n",
     ")\n",
     "metrics"
-   ]
+   ],
+   "outputs": []
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "88046c1f-0343-4e15-a1d0-ab2191417492",
    "metadata": {},
-   "outputs": [],
    "source": [
     "webbrowser.open(f\"file://{report_path.absolute()}\")"
-   ]
+   ],
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -72,7 +72,6 @@
    "execution_count": null,
    "id": "b45c06d4-1a7e-4bf2-aa83-f3f6e411caa9",
    "metadata": {},
-   "outputs": [],
    "source": [
     "report_path, metrics = report(\n",
     "    syn_tgt_data=pd.read_parquet(wdir / \"generated-data\"),\n",
@@ -87,17 +86,18 @@
     "    report_path=\"baseball-seasons-with-context.html\",\n",
     ")\n",
     "metrics"
-   ]
+   ],
+   "outputs": []
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "00aa1fe7-6a9a-40c6-94d0-8b03632f1fa8",
    "metadata": {},
-   "outputs": [],
    "source": [
     "webbrowser.open(f\"file://{report_path.absolute()}\")"
-   ]
+   ],
+   "outputs": []
   }
  ],
  "metadata": {
 
@@ -59,7 +59,7 @@
     "                    trn_tgt_data=tgt,\n",
     "                    hol_tgt_data=hol,\n",
     "                )\n",
-    "                row = pd.json_normalize(metrics, sep=\"_\")\n",
+    "                row = pd.json_normalize(metrics.model_dump(), sep=\"_\")\n",
     "                row.insert(0, \"dataset\", dataset)\n",
     "                row.insert(1, \"synthesizer\", synthesizer)\n",
     "                rows += [row]\n",
@@ -665,7 +665,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.0"
+   "version": "3.11.7"
   }
  },
  "nbformat": 4,
 
@@ -35,7 +35,7 @@
     ")\n",
     "\n",
     "# pretty print metrics\n",
-    "print(json.dumps(metrics, indent=4))\n",
+    "print(metrics.model_dump_json(indent=4))\n",
     "\n",
     "# open up HTML report in new browser window\n",
     "webbrowser.open(f\"file://{report_path.absolute()}\")"