@@ -27,21 +27,21 @@ import json
2727from mostlyai import qa
2828
2929# fetch original + synthetic data
30- base_url = ' https://github.com/mostly-ai/mostlyai-qa/raw/refs/heads/main/examples/quick-start'
31- syn = pd.read_csv(f ' { base_url} /census2k-syn_mostly.csv.gz ' )
30+ base_url = " https://github.com/mostly-ai/mostlyai-qa/raw/refs/heads/main/examples/quick-start"
31+ syn = pd.read_csv(f " { base_url} /census2k-syn_mostly.csv.gz " )
3232# syn = pd.read_csv(f'{base_url}/census2k-syn_flip30.csv.gz') # a 30% perturbation of trn
33- trn = pd.read_csv(f ' { base_url} /census2k-trn.csv.gz ' )
34- hol = pd.read_csv(f ' { base_url} /census2k-hol.csv.gz ' )
33+ trn = pd.read_csv(f " { base_url} /census2k-trn.csv.gz " )
34+ hol = pd.read_csv(f " { base_url} /census2k-hol.csv.gz " )
3535
3636# runs for ~30secs
3737report_path, metrics = qa.report(
38- syn_tgt_data = syn,
39- trn_tgt_data = trn,
40- hol_tgt_data = hol,
38+ syn_tgt_data = syn,
39+ trn_tgt_data = trn,
40+ hol_tgt_data = hol,
4141)
4242
4343# pretty print metrics
44- print (json.dumps(metrics, indent = 4 ))
44+ print (metrics.model_dump_json( indent = 4 ))
4545
4646# open up HTML report in new browser window
4747webbrowser.open(f " file:// { report_path.absolute()} " )
@@ -104,7 +104,7 @@ def report(
104104 max_sample_size_embeddings : int | None = None ,
105105 statistics_path : str | Path | None = None ,
106106 on_progress : ProgressCallback | None = None ,
107- ) -> tuple[Path, dict | None ]:
107+ ) -> tuple[Path, Metrics | None ]:
108108 """
109109 Generate HTML report and metrics for comparing synthetic and original data samples.
110110
@@ -128,7 +128,7 @@ def report(
128128 on_progress: A custom progress callback
129129 Returns:
130130 1. Path to the HTML report
131- 2. Dictionary of calculated metrics :
131+ 2. Pydantic Metrics :
132132 - `accuracy`: # Accuracy is defined as (100% - Total Variation Distance), for each distribution, and then averaged across.
133133 - `overall`: Overall accuracy of synthetic data, i.e. average across univariate, bivariate and coherence.
134134 - `univariate`: Average accuracy of discretized univariate distributions.
0 commit comments