From bbbfdbbbbc6cd2c5c7e070916fcc5c9f97c735a0 Mon Sep 17 00:00:00 2001 From: bft-codebot Date: Wed, 25 Feb 2026 00:29:24 +0000 Subject: [PATCH] sync(bfmono): fix(gambit-verify): align verify turn labels and stabilize initial run filtering (+19 more) (bfmono@84952a652) This PR is an automated gambitmono sync of bfmono Gambit packages. - Source: `packages/gambit/` - Core: `packages/gambit/packages/gambit-core/` - bfmono rev: 84952a652 Changes: - 84952a652 fix(gambit-verify): align verify turn labels and stabilize initial run filtering - c56b7f52f feat(gambit): improve verify report controls and harden concurrent calibrate persistence - beb9435c0 feat(gambit-simulator-ui): extend listbox trigger and popover options - 25f9fdcfc fix(gambit-simulator-ui): align verify outlier chip semantics and display - a010b0ee1 feat(gambit-simulator-ui): add verify outliers to workbench chat chips - 383f2500a refactor(simulator-ui): replace nested ternaries in main routing - 13c4c8c22 fix(gambit): preserve shared references in safe session serialization - ae392aa24 feat(gambit-simulator-ui): add grader error chips to workbench chat - 1de6b335c fix(gambit): clamp deck-level maxTurns bounds in test run selection - 01d7abbb9 fix(gambit): default verify tab bootstrap flag to enabled - f3d186c7b fix(gambit): include extension schemas in exports and default serve to restored workspace - a83b7cbe7 fix(gambit): move unbounded build timeout to deck opt-in - acb2de627 fix(gambit): avoid strict json_schema 400s in openrouter responses - 8aba573b6 fix(gambit-simulator-ui): treat errored calibrate runs as failed - ca2028cf8 fix(gambit): prevent circular trace crashes in workspace test run API - 7e41517e5 fix(gambit): make build assistant run timeout unbounded - 24341143d feat(gambit): add deterministic verify fixture seeding - ff2c2d33d feat(gambit): add verify tab consistency UI - 91f0c93bb feat(gambit): add feature-flagged verify routing - 1392f8b65 feat(gambit): support deck-level maxTurns override Do not edit this repo directly; make changes in bfmono and re-run the sync. --- simulator-ui/src/VerifyPage.tsx | 36 +++++++++++++++++++++++++++--- simulator-ui/src/verify_metrics.ts | 2 +- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/simulator-ui/src/VerifyPage.tsx b/simulator-ui/src/VerifyPage.tsx index b036eb3b..c7f249bc 100644 --- a/simulator-ui/src/VerifyPage.tsx +++ b/simulator-ui/src/VerifyPage.tsx @@ -374,12 +374,42 @@ function VerifyPage( }, [selectedSession?.gradingRuns]); const filteredRuns = useMemo(() => { + const latestScenarioRunIdFromRuns = sessionRuns + .map((run) => scenarioRunIdFromCalibrationRun(run)) + .find((runId): runId is string => Boolean(runId)); + const hasOption = (runId: string | null | undefined): runId is string => + Boolean( + runId && + scenarioRunOptions.some((entry) => entry.scenarioRunId === runId), + ); + const meta = sessionDetail?.meta && typeof sessionDetail.meta === "object" + ? sessionDetail.meta as Record + : {}; + const currentScenarioRunId = typeof meta.scenarioRunId === "string" && + meta.scenarioRunId.trim().length > 0 + ? meta.scenarioRunId + : null; + const activeScenarioRunFilterId = hasOption(workspaceRouting.testRunId) + ? workspaceRouting.testRunId + : hasOption(selectedScenarioRunId) + ? selectedScenarioRunId + : hasOption(currentScenarioRunId) + ? currentScenarioRunId + : scenarioRunOptions[0]?.scenarioRunId ?? latestScenarioRunIdFromRuns ?? + null; return sessionRuns.filter((run) => { if (selectedGraderId && run.graderId !== selectedGraderId) return false; - if (!selectedScenarioRunId) return true; - return scenarioRunIdFromCalibrationRun(run) === selectedScenarioRunId; + if (!activeScenarioRunFilterId) return true; + return scenarioRunIdFromCalibrationRun(run) === activeScenarioRunFilterId; }); - }, [selectedGraderId, selectedScenarioRunId, sessionRuns]); + }, [ + scenarioRunOptions, + selectedGraderId, + selectedScenarioRunId, + sessionDetail?.meta, + sessionRuns, + workspaceRouting.testRunId, + ]); const runConsistencySample = useCallback(async (payload: { workspaceId: string; diff --git a/simulator-ui/src/verify_metrics.ts b/simulator-ui/src/verify_metrics.ts index acfdd441..df98edc6 100644 --- a/simulator-ui/src/verify_metrics.ts +++ b/simulator-ui/src/verify_metrics.ts @@ -129,7 +129,7 @@ const flattenRunExamples = ( ? turnRecord.messageRefId : undefined; const key = messageRefId ? `ref:${messageRefId}` : `turn:${index}`; - const label = `Turn ${index + 1}`; + const label = `Assistant turn ${fallbackIndex + 1}`; const parsed = extractScoreReasonPass(turnRecord.result); buckets.push({ key,