Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 33 additions & 3 deletions simulator-ui/src/VerifyPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -374,12 +374,42 @@ function VerifyPage(
}, [selectedSession?.gradingRuns]);

const filteredRuns = useMemo(() => {
const latestScenarioRunIdFromRuns = sessionRuns
.map((run) => scenarioRunIdFromCalibrationRun(run))
.find((runId): runId is string => Boolean(runId));
const hasOption = (runId: string | null | undefined): runId is string =>
Boolean(
runId &&
scenarioRunOptions.some((entry) => entry.scenarioRunId === runId),
);
const meta = sessionDetail?.meta && typeof sessionDetail.meta === "object"
? sessionDetail.meta as Record<string, unknown>
: {};
const currentScenarioRunId = typeof meta.scenarioRunId === "string" &&
meta.scenarioRunId.trim().length > 0
? meta.scenarioRunId
: null;
const activeScenarioRunFilterId = hasOption(workspaceRouting.testRunId)
? workspaceRouting.testRunId
: hasOption(selectedScenarioRunId)
? selectedScenarioRunId
: hasOption(currentScenarioRunId)
? currentScenarioRunId
: scenarioRunOptions[0]?.scenarioRunId ?? latestScenarioRunIdFromRuns ??
null;
return sessionRuns.filter((run) => {
if (selectedGraderId && run.graderId !== selectedGraderId) return false;
if (!selectedScenarioRunId) return true;
return scenarioRunIdFromCalibrationRun(run) === selectedScenarioRunId;
if (!activeScenarioRunFilterId) return true;
return scenarioRunIdFromCalibrationRun(run) === activeScenarioRunFilterId;
});
}, [selectedGraderId, selectedScenarioRunId, sessionRuns]);
}, [
scenarioRunOptions,
selectedGraderId,
selectedScenarioRunId,
sessionDetail?.meta,
sessionRuns,
workspaceRouting.testRunId,
]);

const runConsistencySample = useCallback(async (payload: {
workspaceId: string;
Expand Down
2 changes: 1 addition & 1 deletion simulator-ui/src/verify_metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ const flattenRunExamples = (
? turnRecord.messageRefId
: undefined;
const key = messageRefId ? `ref:${messageRefId}` : `turn:${index}`;
const label = `Turn ${index + 1}`;
const label = `Assistant turn ${fallbackIndex + 1}`;
const parsed = extractScoreReasonPass(turnRecord.result);
buckets.push({
key,
Expand Down