Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 59 additions & 14 deletions garak/analyze/report_digest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import html
import importlib
import json
import logging
import markdown
import os
import pprint
Expand Down Expand Up @@ -128,7 +129,17 @@ def _init_populate_result_db(evals, taxonomy=None):
groups = []
if taxonomy is not None:
# get the probe tags
tags = garak._plugins.PluginCache.plugin_info(f"probes.{pm}.{pc}")["tags"]
try:
tags = garak._plugins.PluginCache.plugin_info(f"probes.{pm}.{pc}")[
"tags"
]
except (KeyError, TypeError, ValueError):
logging.warning(
"Plugin cache miss for probe probes.%s.%s; skipping taxonomy grouping",
pm,
pc,
)
tags = []
for tag in tags:
if tag.split(":")[0] == taxonomy:
groups.append(":".join(tag.split(":")[1:]))
Expand All @@ -140,7 +151,18 @@ def _init_populate_result_db(evals, taxonomy=None):
for group in groups:
cursor.execute(
"insert into results values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
(pm, group, pc, detector, score, instances, passes, confidence, ci_lower, ci_upper),
(
pm,
group,
pc,
detector,
score,
instances,
passes,
confidence,
ci_lower,
ci_upper,
),
)

return conn, cursor
Expand Down Expand Up @@ -252,9 +274,18 @@ def _get_probe_result_summaries(cursor, probe_group) -> List[tuple]:

def _get_probe_info(probe_module, probe_class, absolute_score) -> dict:
probe_classpath = f"probes.{probe_module}.{probe_class}"
probe_plugin_info = garak._plugins.PluginCache.plugin_info(probe_classpath)
probe_description = probe_plugin_info["description"]
probe_tags = probe_plugin_info["tags"]
try:
probe_plugin_info = garak._plugins.PluginCache.plugin_info(probe_classpath)
probe_description = probe_plugin_info["description"]
probe_tags = probe_plugin_info["tags"]
probe_tier = probe_plugin_info["tier"]
except (KeyError, TypeError, ValueError):
logging.warning(
"Plugin cache miss for probe %s; using placeholders", probe_classpath
)
probe_description = probe_classpath
probe_tags = []
probe_tier = None
probe_plugin_name = f"{probe_module}.{probe_class}"
return {
"probe_name": probe_plugin_name,
Expand All @@ -263,7 +294,7 @@ def _get_probe_info(probe_module, probe_class, absolute_score) -> dict:
absolute_score, garak.analyze.ABSOLUTE_DEFCON_BOUNDS
),
"probe_descr": html.escape(probe_description),
"probe_tier": probe_plugin_info["tier"],
"probe_tier": probe_tier,
"probe_tags": probe_tags,
}

Expand All @@ -275,7 +306,7 @@ def _get_detectors_info(cursor, probe_group: str, probe_class: str) -> List[dict
(probe_group, probe_class),
)
rows = res.fetchall()

return [
{
"detector": row[0],
Expand All @@ -302,10 +333,18 @@ def _get_probe_detector_details(
calibration_used = False
detector = re.sub(r"[^0-9A-Za-z_.]", "", detector)
detector_module, detector_class = detector.split(".")
detector_cache_entry = garak._plugins.PluginCache.plugin_info(
f"detectors.{detector_module}.{detector_class}"
)
detector_description = detector_cache_entry["description"]
try:
detector_cache_entry = garak._plugins.PluginCache.plugin_info(
f"detectors.{detector_module}.{detector_class}"
)
detector_description = detector_cache_entry["description"]
except (KeyError, TypeError, ValueError):
logging.warning(
"Plugin cache miss for detector detectors.%s.%s; using placeholders",
detector_module,
detector_class,
)
detector_description = f"{detector_module}.{detector_class}"

zscore = calibration.get_z_score(
probe_module,
Expand Down Expand Up @@ -366,10 +405,16 @@ def _get_probe_detector_details(
result["confidence"] = confidence
result["absolute_confidence_lower"] = 1.0 - ci_upper # Inverted
result["absolute_confidence_upper"] = 1.0 - ci_lower # Inverted

# Suppress zero-width CIs in HTML display (convert to 0-1 scale)
ci_width = abs(result["absolute_confidence_upper"] - result["absolute_confidence_lower"]) * 100
result["show_confidence_interval"] = (ci_width > CI_DISPLAY_MIN_WIDTH)
ci_width = (
abs(
result["absolute_confidence_upper"]
- result["absolute_confidence_lower"]
)
* 100
)
result["show_confidence_interval"] = ci_width > CI_DISPLAY_MIN_WIDTH

return result

Expand Down
Loading