From e60027755f7663b3ce15d22570e1b8ffa8af99e8 Mon Sep 17 00:00:00 2001 From: Sara Robinson Date: Fri, 31 Oct 2025 11:22:16 -0700 Subject: [PATCH] chore: move CandidateResult, Event, Message, SessionInput and sub-fields to types/evals.py PiperOrigin-RevId: 826562043 --- .../genai/replays/test_get_evaluation_run.py | 8 +- .../replays/test_public_generate_rubrics.py | 2 +- tests/unit/vertexai/genai/test_evals.py | 22 +- vertexai/_genai/_evals_common.py | 11 +- vertexai/_genai/_evals_data_converters.py | 112 ++--- vertexai/_genai/_evals_metric_handlers.py | 4 +- .../_genai/_observability_data_converter.py | 2 +- vertexai/_genai/evals.py | 2 +- vertexai/_genai/types/__init__.py | 50 +-- vertexai/_genai/types/common.py | 381 +++--------------- vertexai/_genai/types/evals.py | 317 ++++++++++++++- 11 files changed, 473 insertions(+), 438 deletions(-) diff --git a/tests/unit/vertexai/genai/replays/test_get_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_get_evaluation_run.py index 3db3bea517..6d07a52178 100644 --- a/tests/unit/vertexai/genai/replays/test_get_evaluation_run.py +++ b/tests/unit/vertexai/genai/replays/test_get_evaluation_run.py @@ -251,10 +251,10 @@ def check_run_5133048044039700480_evaluation_item_results( assert universal_metric_result.explanation is None # Check the first rubric verdict. rubric_verdict_0 = universal_metric_result.rubric_verdicts[0] - assert isinstance(rubric_verdict_0, types.RubricVerdict) - assert rubric_verdict_0.evaluated_rubric == types.Rubric( - content=types.RubricContent( - property=types.RubricContentProperty( + assert isinstance(rubric_verdict_0, types.evals.RubricVerdict) + assert rubric_verdict_0.evaluated_rubric == types.evals.Rubric( + content=types.evals.RubricContent( + property=types.evals.RubricContentProperty( description="The response is in English." ) ), diff --git a/tests/unit/vertexai/genai/replays/test_public_generate_rubrics.py b/tests/unit/vertexai/genai/replays/test_public_generate_rubrics.py index c21ca0e312..d3085bec74 100644 --- a/tests/unit/vertexai/genai/replays/test_public_generate_rubrics.py +++ b/tests/unit/vertexai/genai/replays/test_public_generate_rubrics.py @@ -173,7 +173,7 @@ def test_public_method_generate_rubrics(client): assert "text_quality_rubrics" in first_rubric_group assert isinstance(first_rubric_group["text_quality_rubrics"], list) assert first_rubric_group["text_quality_rubrics"] - assert isinstance(first_rubric_group["text_quality_rubrics"][0], types.Rubric) + assert isinstance(first_rubric_group["text_quality_rubrics"][0], types.evals.Rubric) pytestmark = pytest_helper.setup( diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index b6da92e69c..dec2bb447d 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -2314,7 +2314,7 @@ def test_convert_with_intermediate_events_as_event_objects(self): "response": ["Hi"], "intermediate_events": [ [ - vertexai_genai_types.Event( + vertexai_genai_types.evals.Event( event_id="event1", content=genai_types.Content( parts=[genai_types.Part(text="intermediate event")] @@ -2577,14 +2577,14 @@ def test_convert_with_conversation_history(self): ) assert len(eval_case.conversation_history) == 2 - assert eval_case.conversation_history[0] == vertexai_genai_types.Message( + assert eval_case.conversation_history[0] == vertexai_genai_types.evals.Message( content=genai_types.Content( parts=[genai_types.Part(text="Hello")], role="user" ), turn_id="0", author="user", ) - assert eval_case.conversation_history[1] == vertexai_genai_types.Message( + assert eval_case.conversation_history[1] == vertexai_genai_types.evals.Message( content=genai_types.Content( parts=[genai_types.Part(text="Hi")], role="system" ), @@ -2786,7 +2786,7 @@ class TestEvent: """Unit tests for the Event class.""" def test_event_creation(self): - event = vertexai_genai_types.Event( + event = vertexai_genai_types.evals.Event( event_id="event1", content=genai_types.Content( parts=[genai_types.Part(text="intermediate event")] @@ -2820,7 +2820,7 @@ def test_eval_case_with_agent_eval_fields(self): tool_declarations=[tool], ) intermediate_events = [ - vertexai_genai_types.Event( + vertexai_genai_types.evals.Event( event_id="event1", content=genai_types.Content( parts=[genai_types.Part(text="intermediate event")] @@ -2846,7 +2846,7 @@ class TestSessionInput: """Unit tests for the SessionInput class.""" def test_session_input_creation(self): - session_input = vertexai_genai_types.SessionInput( + session_input = vertexai_genai_types.evals.SessionInput( user_id="user1", state={"key": "value"}, ) @@ -3692,7 +3692,7 @@ def test_eval_case_to_agent_data(self): tool_declarations=[tool], ) intermediate_events = [ - vertexai_genai_types.Event( + vertexai_genai_types.evals.Event( event_id="event1", content=genai_types.Content( parts=[genai_types.Part(text="intermediate event")] @@ -3722,7 +3722,7 @@ def test_eval_case_to_agent_data(self): def test_eval_case_to_agent_data_events_only(self): intermediate_events = [ - vertexai_genai_types.Event( + vertexai_genai_types.evals.Event( event_id="event1", content=genai_types.Content( parts=[genai_types.Part(text="intermediate event")] @@ -3751,7 +3751,7 @@ def test_eval_case_to_agent_data_events_only(self): def test_eval_case_to_agent_data_empty_event_content(self): intermediate_events = [ - vertexai_genai_types.Event( + vertexai_genai_types.evals.Event( event_id="event1", content=None, ) @@ -3933,12 +3933,12 @@ def test_build_request_payload_various_field_types(self): ) ], conversation_history=[ - vertexai_genai_types.Message( + vertexai_genai_types.evals.Message( content=genai_types.Content( parts=[genai_types.Part(text="Turn 1 user")], role="user" ) ), - vertexai_genai_types.Message( + vertexai_genai_types.evals.Message( content=genai_types.Content( parts=[genai_types.Part(text="Turn 1 model")], role="model" ) diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py index bd43229bd9..1383822d64 100644 --- a/vertexai/_genai/_evals_common.py +++ b/vertexai/_genai/_evals_common.py @@ -1271,16 +1271,19 @@ def _execute_agent_run_with_retry( """Executes agent run for a single prompt.""" try: if isinstance(row["session_inputs"], str): - session_inputs = types.SessionInput.model_validate( + session_inputs = types.evals.SessionInput.model_validate( json.loads(row["session_inputs"]) ) elif isinstance(row["session_inputs"], dict): - session_inputs = types.SessionInput.model_validate(row["session_inputs"]) - elif isinstance(row["session_inputs"], types.SessionInput): + session_inputs = types.evals.SessionInput.model_validate( + row["session_inputs"] + ) + elif isinstance(row["session_inputs"], types.evals.SessionInput): session_inputs = row["session_inputs"] else: raise TypeError( - f"Unsupported session_inputs type: {type(row['session_inputs'])}. Expecting string or dict in types.SessionInput format." + f"Unsupported session_inputs type: {type(row['session_inputs'])}. " + "Expecting string or dict in types.evals.SessionInput format." ) user_id = session_inputs.user_id session_state = session_inputs.state diff --git a/vertexai/_genai/_evals_data_converters.py b/vertexai/_genai/_evals_data_converters.py index 337abaaae8..459600caff 100644 --- a/vertexai/_genai/_evals_data_converters.py +++ b/vertexai/_genai/_evals_data_converters.py @@ -60,7 +60,7 @@ class _GeminiEvalDataConverter(_evals_utils.EvalDataConverter): def _parse_request(self, request_data: dict[str, Any]) -> tuple[ genai_types.Content, genai_types.Content, - list[types.Message], + list[types.evals.Message], types.ResponseCandidate, ]: """Parses a request from a Gemini dataset.""" @@ -76,16 +76,16 @@ def _parse_request(self, request_data: dict[str, Any]) -> tuple[ for turn_id, content_dict in enumerate(request_data.get("contents", [])): if not isinstance(content_dict, dict): raise TypeError( - f"Expected a dictionary for content at turn {turn_id}, but got" - f" {type(content_dict).__name__}: {content_dict}" + "Expected a dictionary for content at turn %s, but got %s: %s" + % (turn_id, type(content_dict).__name__, content_dict) ) if "parts" not in content_dict: raise ValueError( - f"Missing 'parts' key in content structure at turn {turn_id}:" - f" {content_dict}" + "Missing 'parts' key in content structure at turn %s: %s" + % (turn_id, content_dict) ) conversation_history.append( - types.Message( + types.evals.Message( turn_id=str(turn_id), content=genai_types.Content.model_validate(content_dict), ) @@ -121,7 +121,7 @@ def convert(self, raw_data: list[dict[str, Any]]) -> types.EvaluationDataset: eval_cases = [] for i, item in enumerate(raw_data): - eval_case_id = f"gemini_eval_case_{i}" + eval_case_id = "gemini_eval_case_%s" % i request_data = item.get("request", {}) response_data = item.get("response", {}) @@ -187,11 +187,11 @@ def convert(self, raw_data: list[dict[str, Any]]) -> types.EvaluationDataset: for i, item_dict in enumerate(raw_data): if not isinstance(item_dict, dict): raise TypeError( - f"Expected a dictionary for item at index {i}, but got" - f" {type(item_dict).__name__}: {item_dict}" + "Expected a dictionary for item at index %s, but got %s: %s" + % (i, type(item_dict).__name__, item_dict) ) item = copy.deepcopy(item_dict) - eval_case_id = f"eval_case_{i}" + eval_case_id = "eval_case_%s" % i prompt_data = item.pop("prompt", None) if not prompt_data: prompt_data = item.pop("source", None) @@ -205,10 +205,12 @@ def convert(self, raw_data: list[dict[str, Any]]) -> types.EvaluationDataset: if not response_data: raise ValueError( - f"Response is required but missing for {eval_case_id}." + "Response is required but missing for %s." % eval_case_id ) if not prompt_data: - raise ValueError(f"Prompt is required but missing for {eval_case_id}.") + raise ValueError( + "Prompt is required but missing for %s." % eval_case_id + ) prompt: genai_types.Content if isinstance(prompt_data, str): @@ -219,16 +221,16 @@ def convert(self, raw_data: list[dict[str, Any]]) -> types.EvaluationDataset: prompt = prompt_data else: raise ValueError( - f"Invalid prompt type for case {i}: {type(prompt_data)}" + "Invalid prompt type for case %s: %s" % (i, type(prompt_data)) ) - conversation_history: Optional[list[types.Message]] = None + conversation_history: Optional[list[types.evals.Message]] = None if isinstance(conversation_history_data, list): conversation_history = [] for turn_id, content in enumerate(conversation_history_data): if isinstance(content, genai_types.Content): conversation_history.append( - types.Message( + types.evals.Message( turn_id=str(turn_id), content=content, ) @@ -239,7 +241,7 @@ def convert(self, raw_data: list[dict[str, Any]]) -> types.EvaluationDataset: content ) conversation_history.append( - types.Message( + types.evals.Message( turn_id=str(turn_id), content=validated_content, ) @@ -282,7 +284,7 @@ def convert(self, raw_data: list[dict[str, Any]]) -> types.EvaluationDataset: responses = [types.ResponseCandidate(response=response_data)] else: raise ValueError( - f"Invalid response type for case {i}: {type(response_data)}" + "Invalid response type for case %s: %s" % (i, type(response_data)) ) reference: Optional[types.ResponseCandidate] = None @@ -322,14 +324,14 @@ def convert(self, raw_data: list[dict[str, Any]]) -> types.EvaluationDataset: try: validated_rubrics = [ ( - types.Rubric.model_validate(r) + types.evals.Rubric.model_validate(r) if isinstance(r, dict) else r ) for r in value ] if all( - isinstance(r, types.Rubric) + isinstance(r, types.evals.Rubric) for r in validated_rubrics ): rubric_groups[key] = types.RubricGroup( @@ -337,11 +339,16 @@ def convert(self, raw_data: list[dict[str, Any]]) -> types.EvaluationDataset: ) else: logger.warning( - f"Invalid item type in rubric list for group '{key}' in case {i}." + "Invalid item type in rubric list for group '%s' in case %s.", + key, + i, ) except Exception as e: logger.warning( - f"Failed to validate rubrics for group '{key}' in case {i}: {e}" + "Failed to validate rubrics for group '%s' in case %s: %s", + key, + i, + e, ) elif isinstance(value, types.RubricGroup): rubric_groups[key] = value @@ -352,44 +359,56 @@ def convert(self, raw_data: list[dict[str, Any]]) -> types.EvaluationDataset: ) except Exception as e: logger.warning( - f"Failed to validate RubricGroup dict for group '{key}' in case {i}: {e}" + "Failed to validate RubricGroup dict for group '%s' in case %s: %s", + key, + i, + e, ) else: logger.warning( - f"Invalid type for rubric group '{key}' in case {i}." - " Expected list of rubrics, dict, or RubricGroup." + "Invalid type for rubric group '%s' in case %s." + " Expected list of rubrics, dict, or RubricGroup.", + key, + i, ) else: logger.warning( - f"Invalid type for rubric_groups in case {i}. Expected dict." + "Invalid type for rubric_groups in case %s. Expected dict.", + i, ) - intermediate_events: Optional[list[types.Event]] = None + intermediate_events: Optional[list[types.evals.Event]] = None if intermediate_events_data: if isinstance(intermediate_events_data, list): intermediate_events = [] for event in intermediate_events_data: if isinstance(event, dict): try: - validated_event = types.Event.model_validate(event) + validated_event = types.evals.Event.model_validate( + event + ) intermediate_events.append(validated_event) except Exception as e: logger.warning( "Failed to validate intermediate event dict for" - f" case {i}: {e}" + " case %s: %s", + i, + e, ) - elif isinstance(event, types.Event): + elif isinstance(event, types.evals.Event): intermediate_events.append(event) else: logger.warning( "Invalid type for intermediate_event in case" - f" {i}. Expected list of dicts or list of" - " types.Event objects." + " %s. Expected list of dicts or list of" + " types.evals.Event objects.", + i, ) else: logger.warning( - f"Invalid type for intermediate_events in case {i}. Expected" - " list of types.Event objects." + "Invalid type for intermediate_events in case %s. Expected" + " list of types.evals.Event objects.", + i, ) eval_case = types.EvalCase( @@ -414,7 +433,7 @@ class _OpenAIDataConverter(_evals_utils.EvalDataConverter): def _parse_messages(self, messages: list[dict[str, Any]]) -> tuple[ Optional[genai_types.Content], - list[types.Message], + list[types.evals.Message], Optional[genai_types.Content], Optional[types.ResponseCandidate], ]: @@ -434,7 +453,7 @@ def _parse_messages(self, messages: list[dict[str, Any]]) -> tuple[ role = msg.get("role", "user") content = msg.get("content", "") conversation_history.append( - types.Message( + types.evals.Message( turn_id=str(turn_id), content=genai_types.Content( parts=[genai_types.Part(text=content)], role=role @@ -460,11 +479,11 @@ def convert(self, raw_data: list[dict[str, Any]]) -> types.EvaluationDataset: """Converts a list of OpenAI ChatCompletion data into an EvaluationDataset.""" eval_cases = [] for i, item in enumerate(raw_data): - eval_case_id = f"openai_eval_case_{i}" + eval_case_id = "openai_eval_case_%s" % i if "request" not in item or "response" not in item: logger.warning( - f"Skipping case {i} due to missing 'request' or 'response' key." + "Skipping case %s due to missing 'request' or 'response' key.", i ) continue @@ -610,7 +629,7 @@ def get_dataset_converter( if dataset_schema in _CONVERTER_REGISTRY: return _CONVERTER_REGISTRY[dataset_schema]() # type: ignore[abstract] else: - raise ValueError(f"Unsupported dataset schema: {dataset_schema}") + raise ValueError("Unsupported dataset schema: %s" % dataset_schema) def _get_first_part_text(content: genai_types.Content) -> str: @@ -695,7 +714,7 @@ def merge_response_datasets_into_canonical_format( """ if not isinstance(raw_datasets, list): raise TypeError( - f"Input 'raw_datasets' must be a list, got {type(raw_datasets)}." + "Input 'raw_datasets' must be a list, got %s." % type(raw_datasets) ) if not raw_datasets or not all(isinstance(ds, list) for ds in raw_datasets): raise ValueError( @@ -704,7 +723,7 @@ def merge_response_datasets_into_canonical_format( if not schemas or len(schemas) != len(raw_datasets): raise ValueError( "A list of schemas must be provided, one for each raw dataset. " - f"Got {len(schemas)} schemas for {len(raw_datasets)} datasets." + "Got %s schemas for %s datasets." % (len(schemas), len(raw_datasets)) ) num_expected_cases = len(raw_datasets[0]) @@ -719,8 +738,8 @@ def merge_response_datasets_into_canonical_format( if len(raw_ds_entry) != num_expected_cases: raise ValueError( "All datasets must have the same number of evaluation cases. " - f"Base dataset (0) has {num_expected_cases}, but dataset {i} " - f"(schema: {schema}) has {len(raw_ds_entry)}." + "Base dataset (0) has %s, but dataset %s (schema: %s) has %s." + % (num_expected_cases, i, schema, len(raw_ds_entry)) ) converter = get_dataset_converter(schema) parsed_evaluation_datasets.append(converter.convert(raw_ds_entry)) @@ -746,7 +765,7 @@ def merge_response_datasets_into_canonical_format( ) candidate_responses.append( _create_placeholder_response_candidate( - f"Missing response from base dataset (0) for case {case_idx}" + "Missing response from base dataset (0) for case %s" % case_idx ) ) @@ -799,13 +818,14 @@ def merge_response_datasets_into_canonical_format( ) candidate_responses.append( _create_placeholder_response_candidate( - f"Missing response from dataset {dataset_idx_offset} " - f"for case {case_idx}" + "Missing response from dataset %s for case %s" + % (dataset_idx_offset, case_idx) ) ) merged_case = types.EvalCase( - eval_case_id=base_eval_case.eval_case_id or f"merged_eval_case_{case_idx}", + eval_case_id=base_eval_case.eval_case_id + or "merged_eval_case_%s" % case_idx, prompt=base_eval_case.prompt, responses=candidate_responses, reference=base_eval_case.reference, diff --git a/vertexai/_genai/_evals_metric_handlers.py b/vertexai/_genai/_evals_metric_handlers.py index 322d3aff71..9f68bc353d 100644 --- a/vertexai/_genai/_evals_metric_handlers.py +++ b/vertexai/_genai/_evals_metric_handlers.py @@ -480,7 +480,7 @@ def _build_rubric_based_input( ) rubrics_list = [] - parsed_rubrics = [types.Rubric(**r) for r in rubrics_list] + parsed_rubrics = [types.evals.Rubric(**r) for r in rubrics_list] rubric_enhanced_contents = { "prompt": ( [eval_case.prompt.model_dump(mode="json", exclude_none=True)] @@ -535,7 +535,7 @@ def _build_pointwise_input( elif isinstance(value, list) and value: if isinstance(value[0], genai_types.Content): content_list_to_serialize = value - elif isinstance(value[0], types.Message): + elif isinstance(value[0], types.evals.Message): history_texts = [] for msg_obj in value: msg_text = _extract_text_from_content(msg_obj.content) diff --git a/vertexai/_genai/_observability_data_converter.py b/vertexai/_genai/_observability_data_converter.py index f7e7f11a08..c52f80b8ea 100644 --- a/vertexai/_genai/_observability_data_converter.py +++ b/vertexai/_genai/_observability_data_converter.py @@ -129,7 +129,7 @@ def _parse_messages( if len(request_msgs) > 1: for i, msg in enumerate(request_msgs[:-1]): conversation_history.append( - types.Message( + types.evals.Message( turn_id=str(i), content=self._message_to_content(msg), author=msg.get("role", ""), diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index bc60977a82..7b7ad2c023 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -361,7 +361,7 @@ def _RubricBasedMetricSpec_to_vertex( setv( to_object, ["inline_rubrics", "rubrics"], - [item for item in getv(from_object, ["inline_rubrics"])], + getv(from_object, ["inline_rubrics"]), ) if getv(from_object, ["rubric_group_key"]) is not None: diff --git a/vertexai/_genai/types/__init__.py b/vertexai/_genai/types/__init__.py index 735e567dc1..ed2eff77c4 100644 --- a/vertexai/_genai/types/__init__.py +++ b/vertexai/_genai/types/__init__.py @@ -146,7 +146,6 @@ from .common import CandidateResponseOrDict from .common import CandidateResult from .common import CandidateResultDict -from .common import CandidateResultOrDict from .common import Chunk from .common import ChunkDict from .common import ChunkOrDict @@ -343,7 +342,6 @@ from .common import EventMetadata from .common import EventMetadataDict from .common import EventMetadataOrDict -from .common import EventOrDict from .common import ExactMatchInput from .common import ExactMatchInputDict from .common import ExactMatchInputOrDict @@ -552,7 +550,6 @@ from .common import MemoryTopicIdOrDict from .common import Message from .common import MessageDict -from .common import MessageOrDict from .common import Metadata from .common import MetadataDict from .common import MetadataOrDict @@ -755,10 +752,8 @@ from .common import RubricBasedMetricSpecOrDict from .common import RubricContent from .common import RubricContentDict -from .common import RubricContentOrDict from .common import RubricContentProperty from .common import RubricContentPropertyDict -from .common import RubricContentPropertyOrDict from .common import RubricContentType from .common import RubricDict from .common import RubricEnhancedContents @@ -773,10 +768,8 @@ from .common import RubricGroup from .common import RubricGroupDict from .common import RubricGroupOrDict -from .common import RubricOrDict from .common import RubricVerdict from .common import RubricVerdictDict -from .common import RubricVerdictOrDict from .common import SamplingConfig from .common import SamplingConfigDict from .common import SamplingConfigOrDict @@ -876,9 +869,6 @@ from .common import SessionEvent from .common import SessionEventDict from .common import SessionEventOrDict -from .common import SessionInput -from .common import SessionInputDict -from .common import SessionInputOrDict from .common import SessionOrDict from .common import State from .common import Strategy @@ -990,21 +980,6 @@ "EvaluationItemRequest", "EvaluationItemRequestDict", "EvaluationItemRequestOrDict", - "RubricContentProperty", - "RubricContentPropertyDict", - "RubricContentPropertyOrDict", - "RubricContent", - "RubricContentDict", - "RubricContentOrDict", - "Rubric", - "RubricDict", - "RubricOrDict", - "RubricVerdict", - "RubricVerdictDict", - "RubricVerdictOrDict", - "CandidateResult", - "CandidateResultDict", - "CandidateResultOrDict", "EvaluationItemResult", "EvaluationItemResultDict", "EvaluationItemResultOrDict", @@ -1059,12 +1034,6 @@ "ResponseCandidate", "ResponseCandidateDict", "ResponseCandidateOrDict", - "Event", - "EventDict", - "EventOrDict", - "Message", - "MessageDict", - "MessageOrDict", "EvalCase", "EvalCaseDict", "EvalCaseOrDict", @@ -1770,9 +1739,6 @@ "EvaluationRunInferenceConfig", "EvaluationRunInferenceConfigDict", "EvaluationRunInferenceConfigOrDict", - "SessionInput", - "SessionInputDict", - "SessionInputOrDict", "WinRateStats", "WinRateStatsDict", "WinRateStatsOrDict", @@ -1835,7 +1801,6 @@ "SamplingMethod", "RubricContentType", "EvaluationRunState", - "Importance", "OptimizeTarget", "GenerateMemoriesResponseGeneratedMemoryAction", "PromptOptimizerMethod", @@ -1844,6 +1809,21 @@ "PromptDataOrDict", "LLMMetric", "MetricPromptBuilder", + "RubricContentProperty", + "RubricContentPropertyDict", + "RubricContent", + "RubricContentDict", + "Rubric", + "RubricDict", + "RubricVerdict", + "RubricVerdictDict", + "CandidateResult", + "CandidateResultDict", + "Event", + "EventDict", + "Message", + "MessageDict", + "Importance", "_CreateEvaluationItemParameters", "_CreateEvaluationRunParameters", "_CreateEvaluationSetParameters", diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py index 3e6622acf4..d193042586 100644 --- a/vertexai/_genai/types/common.py +++ b/vertexai/_genai/types/common.py @@ -318,19 +318,6 @@ class EvaluationRunState(_common.CaseInSensitiveEnum): """Evaluation run is performing rubric generation.""" -class Importance(_common.CaseInSensitiveEnum): - """Importance level of the rubric.""" - - IMPORTANCE_UNSPECIFIED = "IMPORTANCE_UNSPECIFIED" - """Importance is not specified.""" - HIGH = "HIGH" - """High importance.""" - MEDIUM = "MEDIUM" - """Medium importance.""" - LOW = "LOW" - """Low importance.""" - - class OptimizeTarget(_common.CaseInSensitiveEnum): """None""" @@ -537,196 +524,6 @@ class EvaluationItemRequestDict(TypedDict, total=False): EvaluationItemRequestOrDict = Union[EvaluationItemRequest, EvaluationItemRequestDict] -class RubricContentProperty(_common.BaseModel): - """Defines criteria based on a specific property.""" - - description: Optional[str] = Field( - default=None, - description="""Description of the property being evaluated. - Example: "The model's response is grammatically correct." """, - ) - - -class RubricContentPropertyDict(TypedDict, total=False): - """Defines criteria based on a specific property.""" - - description: Optional[str] - """Description of the property being evaluated. - Example: "The model's response is grammatically correct." """ - - -RubricContentPropertyOrDict = Union[RubricContentProperty, RubricContentPropertyDict] - - -class RubricContent(_common.BaseModel): - """Content of the rubric, defining the testable criteria.""" - - property: Optional[RubricContentProperty] = Field( - default=None, - description="""Evaluation criteria based on a specific property.""", - ) - - -class RubricContentDict(TypedDict, total=False): - """Content of the rubric, defining the testable criteria.""" - - property: Optional[RubricContentPropertyDict] - """Evaluation criteria based on a specific property.""" - - -RubricContentOrDict = Union[RubricContent, RubricContentDict] - - -class Rubric(_common.BaseModel): - """Message representing a single testable criterion for evaluation. - - One input prompt could have multiple rubrics. - """ - - rubric_id: Optional[str] = Field( - default=None, - description="""Required. Unique identifier for the rubric. - This ID is used to refer to this rubric, e.g., in RubricVerdict.""", - ) - content: Optional[RubricContent] = Field( - default=None, - description="""Required. The actual testable criteria for the rubric.""", - ) - type: Optional[str] = Field( - default=None, - description="""Optional. A type designator for the rubric, which can inform how it's - evaluated or interpreted by systems or users. - It's recommended to use consistent, well-defined, upper snake_case strings. - Examples: "SUMMARIZATION_QUALITY", "SAFETY_HARMFUL_CONTENT", - "INSTRUCTION_ADHERENCE".""", - ) - importance: Optional[Importance] = Field( - default=None, - description="""Optional. The relative importance of this rubric.""", - ) - - -class RubricDict(TypedDict, total=False): - """Message representing a single testable criterion for evaluation. - - One input prompt could have multiple rubrics. - """ - - rubric_id: Optional[str] - """Required. Unique identifier for the rubric. - This ID is used to refer to this rubric, e.g., in RubricVerdict.""" - - content: Optional[RubricContentDict] - """Required. The actual testable criteria for the rubric.""" - - type: Optional[str] - """Optional. A type designator for the rubric, which can inform how it's - evaluated or interpreted by systems or users. - It's recommended to use consistent, well-defined, upper snake_case strings. - Examples: "SUMMARIZATION_QUALITY", "SAFETY_HARMFUL_CONTENT", - "INSTRUCTION_ADHERENCE".""" - - importance: Optional[Importance] - """Optional. The relative importance of this rubric.""" - - -RubricOrDict = Union[Rubric, RubricDict] - - -class RubricVerdict(_common.BaseModel): - """Represents the verdict of an evaluation against a single rubric.""" - - evaluated_rubric: Optional[Rubric] = Field( - default=None, - description="""Required. The full rubric definition that was evaluated. - Storing this ensures the verdict is self-contained and understandable, - especially if the original rubric definition changes or was dynamically - generated.""", - ) - verdict: Optional[bool] = Field( - default=None, - description="""Required. Outcome of the evaluation against the rubric, represented as a - boolean. `true` indicates a "Pass", `false` indicates a "Fail".""", - ) - reasoning: Optional[str] = Field( - default=None, - description="""Optional. Human-readable reasoning or explanation for the verdict. - This can include specific examples or details from the evaluated content - that justify the given verdict.""", - ) - - -class RubricVerdictDict(TypedDict, total=False): - """Represents the verdict of an evaluation against a single rubric.""" - - evaluated_rubric: Optional[RubricDict] - """Required. The full rubric definition that was evaluated. - Storing this ensures the verdict is self-contained and understandable, - especially if the original rubric definition changes or was dynamically - generated.""" - - verdict: Optional[bool] - """Required. Outcome of the evaluation against the rubric, represented as a - boolean. `true` indicates a "Pass", `false` indicates a "Fail".""" - - reasoning: Optional[str] - """Optional. Human-readable reasoning or explanation for the verdict. - This can include specific examples or details from the evaluated content - that justify the given verdict.""" - - -RubricVerdictOrDict = Union[RubricVerdict, RubricVerdictDict] - - -class CandidateResult(_common.BaseModel): - """Result for a single candidate.""" - - candidate: Optional[str] = Field( - default=None, - description="""The candidate that is being evaluated. The value is the same as the candidate name in the EvaluationRequest.""", - ) - metric: Optional[str] = Field( - default=None, description="""The metric that was evaluated.""" - ) - score: Optional[float] = Field( - default=None, description="""The score of the metric.""" - ) - explanation: Optional[str] = Field( - default=None, description="""The explanation for the metric.""" - ) - rubric_verdicts: Optional[list[RubricVerdict]] = Field( - default=None, description="""The rubric verdicts for the metric.""" - ) - additional_results: Optional[dict[str, Any]] = Field( - default=None, description="""Additional results for the metric.""" - ) - - -class CandidateResultDict(TypedDict, total=False): - """Result for a single candidate.""" - - candidate: Optional[str] - """The candidate that is being evaluated. The value is the same as the candidate name in the EvaluationRequest.""" - - metric: Optional[str] - """The metric that was evaluated.""" - - score: Optional[float] - """The score of the metric.""" - - explanation: Optional[str] - """The explanation for the metric.""" - - rubric_verdicts: Optional[list[RubricVerdictDict]] - """The rubric verdicts for the metric.""" - - additional_results: Optional[dict[str, Any]] - """Additional results for the metric.""" - - -CandidateResultOrDict = Union[CandidateResult, CandidateResultDict] - - class EvaluationItemResult(_common.BaseModel): """Represents the result of an evaluation item.""" @@ -743,7 +540,7 @@ class EvaluationItemResult(_common.BaseModel): metric: Optional[str] = Field( default=None, description="""The metric that was evaluated.""" ) - candidate_results: Optional[list[CandidateResult]] = Field( + candidate_results: Optional[list[evals_types.CandidateResult]] = Field( default=None, description="""TThe results for the metric.""" ) metadata: Optional[dict[str, Any]] = Field( @@ -766,7 +563,7 @@ class EvaluationItemResultDict(TypedDict, total=False): metric: Optional[str] """The metric that was evaluated.""" - candidate_results: Optional[list[CandidateResultDict]] + candidate_results: Optional[list[evals_types.CandidateResult]] """TThe results for the metric.""" metadata: Optional[dict[str, Any]] @@ -1440,89 +1237,6 @@ class ResponseCandidateDict(TypedDict, total=False): ResponseCandidateOrDict = Union[ResponseCandidate, ResponseCandidateDict] -class Event(_common.BaseModel): - """Represents an event in a conversation between agents and users. - - It is used to store the content of the conversation, as well as the actions - taken by the agents like function calls, function responses, intermediate NL - responses etc. - """ - - event_id: Optional[str] = Field( - default=None, description="""Unique identifier for the agent event.""" - ) - content: Optional[genai_types.Content] = Field( - default=None, description="""Content of the event.""" - ) - creation_timestamp: Optional[datetime.datetime] = Field( - default=None, description="""The creation timestamp of the event.""" - ) - author: Optional[str] = Field( - default=None, description="""Name of the entity that produced the event.""" - ) - - -class EventDict(TypedDict, total=False): - """Represents an event in a conversation between agents and users. - - It is used to store the content of the conversation, as well as the actions - taken by the agents like function calls, function responses, intermediate NL - responses etc. - """ - - event_id: Optional[str] - """Unique identifier for the agent event.""" - - content: Optional[genai_types.ContentDict] - """Content of the event.""" - - creation_timestamp: Optional[datetime.datetime] - """The creation timestamp of the event.""" - - author: Optional[str] - """Name of the entity that produced the event.""" - - -EventOrDict = Union[Event, EventDict] - - -class Message(_common.BaseModel): - """Represents a single message turn in a conversation.""" - - turn_id: Optional[str] = Field( - default=None, description="""Unique identifier for the message turn.""" - ) - content: Optional[genai_types.Content] = Field( - default=None, description="""Content of the message, including function call.""" - ) - creation_timestamp: Optional[datetime.datetime] = Field( - default=None, - description="""Timestamp indicating when the message was created.""", - ) - author: Optional[str] = Field( - default=None, description="""Name of the entity that produced the message.""" - ) - - -class MessageDict(TypedDict, total=False): - """Represents a single message turn in a conversation.""" - - turn_id: Optional[str] - """Unique identifier for the message turn.""" - - content: Optional[genai_types.ContentDict] - """Content of the message, including function call.""" - - creation_timestamp: Optional[datetime.datetime] - """Timestamp indicating when the message was created.""" - - author: Optional[str] - """Name of the entity that produced the message.""" - - -MessageOrDict = Union[Message, MessageDict] - - class EvalCase(_common.BaseModel): """A comprehensive representation of a GenAI interaction for evaluation.""" @@ -1540,7 +1254,7 @@ class EvalCase(_common.BaseModel): system_instruction: Optional[genai_types.Content] = Field( default=None, description="""System instruction for the model.""" ) - conversation_history: Optional[list[Message]] = Field( + conversation_history: Optional[list[evals_types.Message]] = Field( default=None, description="""List of all prior messages in the conversation (chat history).""", ) @@ -1551,7 +1265,7 @@ class EvalCase(_common.BaseModel): eval_case_id: Optional[str] = Field( default=None, description="""Unique identifier for the evaluation case.""" ) - intermediate_events: Optional[list[Event]] = Field( + intermediate_events: Optional[list[evals_types.Event]] = Field( default=None, description="""This field is experimental and may change in future versions. Intermediate events of a single turn in an agent run or intermediate events of the last turn for multi-turn an agent run.""", ) @@ -1578,7 +1292,7 @@ class EvalCaseDict(TypedDict, total=False): system_instruction: Optional[genai_types.ContentDict] """System instruction for the model.""" - conversation_history: Optional[list[MessageDict]] + conversation_history: Optional[list[evals_types.Message]] """List of all prior messages in the conversation (chat history).""" rubric_groups: Optional[dict[str, "RubricGroupDict"]] @@ -1587,7 +1301,7 @@ class EvalCaseDict(TypedDict, total=False): eval_case_id: Optional[str] """Unique identifier for the evaluation case.""" - intermediate_events: Optional[list[EventDict]] + intermediate_events: Optional[list[evals_types.Event]] """This field is experimental and may change in future versions. Intermediate events of a single turn in an agent run or intermediate events of the last turn for multi-turn an agent run.""" agent_info: Optional[evals_types.AgentInfo] @@ -2727,7 +2441,7 @@ class RubricBasedMetricSpec(_common.BaseModel): default=None, description="""Optional configuration for the judge LLM (Autorater).""", ) - inline_rubrics: Optional[list[Rubric]] = Field( + inline_rubrics: Optional[list[evals_types.Rubric]] = Field( default=None, description="""Use rubrics provided directly in the spec.""" ) rubric_group_key: Optional[str] = Field( @@ -2752,7 +2466,7 @@ class RubricBasedMetricSpecDict(TypedDict, total=False): judge_autorater_config: Optional[genai_types.AutoraterConfigDict] """Optional configuration for the judge LLM (Autorater).""" - inline_rubrics: Optional[list[RubricDict]] + inline_rubrics: Optional[list[evals_types.Rubric]] """Use rubrics provided directly in the spec.""" rubric_group_key: Optional[str] @@ -3220,7 +2934,7 @@ class MetricResult(_common.BaseModel): default=None, description="""The score for the metric. Please refer to each metric's documentation for the meaning of the score.""", ) - rubric_verdicts: Optional[list[RubricVerdict]] = Field( + rubric_verdicts: Optional[list[evals_types.RubricVerdict]] = Field( default=None, description="""For rubric-based metrics, the verdicts for each rubric.""", ) @@ -3238,7 +2952,7 @@ class MetricResultDict(TypedDict, total=False): score: Optional[float] """The score for the metric. Please refer to each metric's documentation for the meaning of the score.""" - rubric_verdicts: Optional[list[RubricVerdictDict]] + rubric_verdicts: Optional[list[evals_types.RubricVerdict]] """For rubric-based metrics, the verdicts for each rubric.""" explanation: Optional[str] @@ -3257,7 +2971,7 @@ class RubricBasedMetricResult(_common.BaseModel): score: Optional[float] = Field( default=None, description="""Passing rate of all the rubrics.""" ) - rubric_verdicts: Optional[list[RubricVerdict]] = Field( + rubric_verdicts: Optional[list[evals_types.RubricVerdict]] = Field( default=None, description="""The details of all the rubrics and their verdicts.""", ) @@ -3269,7 +2983,7 @@ class RubricBasedMetricResultDict(TypedDict, total=False): score: Optional[float] """Passing rate of all the rubrics.""" - rubric_verdicts: Optional[list[RubricVerdictDict]] + rubric_verdicts: Optional[list[evals_types.RubricVerdict]] """The details of all the rubrics and their verdicts.""" @@ -3855,7 +3569,7 @@ class _GenerateInstanceRubricsRequestDict(TypedDict, total=False): class GenerateInstanceRubricsResponse(_common.BaseModel): """Response for generating rubrics.""" - generated_rubrics: Optional[list[Rubric]] = Field( + generated_rubrics: Optional[list[evals_types.Rubric]] = Field( default=None, description="""A list of generated rubrics.""" ) @@ -3863,7 +3577,7 @@ class GenerateInstanceRubricsResponse(_common.BaseModel): class GenerateInstanceRubricsResponseDict(TypedDict, total=False): """Response for generating rubrics.""" - generated_rubrics: Optional[list[RubricDict]] + generated_rubrics: Optional[list[evals_types.Rubric]] """A list of generated rubrics.""" @@ -12611,7 +12325,7 @@ class EvalCaseMetricResult(_common.BaseModel): explanation: Optional[str] = Field( default=None, description="""Explanation of the metric.""" ) - rubric_verdicts: Optional[list[RubricVerdict]] = Field( + rubric_verdicts: Optional[list[evals_types.RubricVerdict]] = Field( default=None, description="""The details of all the rubrics and their verdicts for rubric-based metrics.""", ) @@ -12635,7 +12349,7 @@ class EvalCaseMetricResultDict(TypedDict, total=False): explanation: Optional[str] """Explanation of the metric.""" - rubric_verdicts: Optional[list[RubricVerdictDict]] + rubric_verdicts: Optional[list[evals_types.RubricVerdict]] """The details of all the rubrics and their verdicts for rubric-based metrics.""" raw_output: Optional[list[str]] @@ -12713,34 +12427,6 @@ class EvaluationRunInferenceConfigDict(TypedDict, total=False): ] -class SessionInput(_common.BaseModel): - """This field is experimental and may change in future versions. - - Input to initialize a session and run an agent, used for agent evaluation. - """ - - user_id: Optional[str] = Field(default=None, description="""The user id.""") - state: Optional[dict[str, str]] = Field( - default=None, description="""The state of the session.""" - ) - - -class SessionInputDict(TypedDict, total=False): - """This field is experimental and may change in future versions. - - Input to initialize a session and run an agent, used for agent evaluation. - """ - - user_id: Optional[str] - """The user id.""" - - state: Optional[dict[str, str]] - """The state of the session.""" - - -SessionInputOrDict = Union[SessionInput, SessionInputDict] - - class WinRateStats(_common.BaseModel): """Statistics for win rates for a single metric.""" @@ -12953,7 +12639,7 @@ class RubricGroup(_common.BaseModel): Example: "Instruction Following V1", "Content Quality - Summarization Task".""", ) - rubrics: Optional[list[Rubric]] = Field( + rubrics: Optional[list[evals_types.Rubric]] = Field( default=None, description="""Rubrics that are part of this group.""" ) @@ -12970,7 +12656,7 @@ class RubricGroupDict(TypedDict, total=False): Example: "Instruction Following V1", "Content Quality - Summarization Task".""" - rubrics: Optional[list[RubricDict]] + rubrics: Optional[list[evals_types.Rubric]] """Rubrics that are part of this group.""" @@ -13030,6 +12716,37 @@ def delete( self.api_client.delete(name=self.api_resource.name, force=force, config=config) # type: ignore[union-attr] +RubricContentProperty = evals_types.RubricContentProperty +RubricContentPropertyDict = evals_types.RubricContentPropertyDict +RubricContentPropertyDictOrDict = evals_types.RubricContentPropertyOrDict + +RubricContent = evals_types.RubricContent +RubricContentDict = evals_types.RubricContentDict +RubricContentDictOrDict = evals_types.RubricContentOrDict + +Rubric = evals_types.Rubric +RubricDict = evals_types.RubricDict +RubricDictOrDict = evals_types.RubricOrDict + +RubricVerdict = evals_types.RubricVerdict +RubricVerdictDict = evals_types.RubricVerdictDict +RubricVerdictDictOrDict = evals_types.RubricVerdictOrDict + +CandidateResult = evals_types.CandidateResult +CandidateResultDict = evals_types.CandidateResultDict +CandidateResultDictOrDict = evals_types.CandidateResultOrDict + +Event = evals_types.Event +EventDict = evals_types.EventDict +EventDictOrDict = evals_types.EventOrDict + +Message = evals_types.Message +MessageDict = evals_types.MessageDict +MessageDictOrDict = evals_types.MessageOrDict + +Importance = evals_types.Importance + + class AgentEngineDict(TypedDict, total=False): """An agent engine instance.""" diff --git a/vertexai/_genai/types/evals.py b/vertexai/_genai/types/evals.py index 9ecade7ee3..2c8aba057a 100644 --- a/vertexai/_genai/types/evals.py +++ b/vertexai/_genai/types/evals.py @@ -15,13 +15,27 @@ # Code generated by the Google Gen AI SDK generator DO NOT EDIT. -from typing import Optional, Union +import datetime +from typing import Any, Optional, Union from google.genai import _common from google.genai import types as genai_types from pydantic import Field from typing_extensions import TypedDict +class Importance(_common.CaseInSensitiveEnum): + """Importance level of the rubric.""" + + IMPORTANCE_UNSPECIFIED = "IMPORTANCE_UNSPECIFIED" + """Importance is not specified.""" + HIGH = "HIGH" + """High importance.""" + MEDIUM = "MEDIUM" + """Medium importance.""" + LOW = "LOW" + """Low importance.""" + + class AgentInfo(_common.BaseModel): """The agent info of an agent, used for agent eval.""" @@ -67,6 +81,307 @@ class AgentInfoDict(TypedDict, total=False): AgentInfoOrDict = Union[AgentInfo, AgentInfoDict] +class RubricContentProperty(_common.BaseModel): + """Defines criteria based on a specific property.""" + + description: Optional[str] = Field( + default=None, + description="""Description of the property being evaluated. + Example: "The model's response is grammatically correct." """, + ) + + +class RubricContentPropertyDict(TypedDict, total=False): + """Defines criteria based on a specific property.""" + + description: Optional[str] + """Description of the property being evaluated. + Example: "The model's response is grammatically correct." """ + + +RubricContentPropertyOrDict = Union[RubricContentProperty, RubricContentPropertyDict] + + +class RubricContent(_common.BaseModel): + """Content of the rubric, defining the testable criteria.""" + + property: Optional[RubricContentProperty] = Field( + default=None, + description="""Evaluation criteria based on a specific property.""", + ) + + +class RubricContentDict(TypedDict, total=False): + """Content of the rubric, defining the testable criteria.""" + + property: Optional[RubricContentPropertyDict] + """Evaluation criteria based on a specific property.""" + + +RubricContentOrDict = Union[RubricContent, RubricContentDict] + + +class Rubric(_common.BaseModel): + """Message representing a single testable criterion for evaluation. + + One input prompt could have multiple rubrics. + """ + + rubric_id: Optional[str] = Field( + default=None, + description="""Required. Unique identifier for the rubric. + This ID is used to refer to this rubric, e.g., in RubricVerdict.""", + ) + content: Optional[RubricContent] = Field( + default=None, + description="""Required. The actual testable criteria for the rubric.""", + ) + type: Optional[str] = Field( + default=None, + description="""Optional. A type designator for the rubric, which can inform how it's + evaluated or interpreted by systems or users. + It's recommended to use consistent, well-defined, upper snake_case strings. + Examples: "SUMMARIZATION_QUALITY", "SAFETY_HARMFUL_CONTENT", + "INSTRUCTION_ADHERENCE".""", + ) + importance: Optional[Importance] = Field( + default=None, + description="""Optional. The relative importance of this rubric.""", + ) + + +class RubricDict(TypedDict, total=False): + """Message representing a single testable criterion for evaluation. + + One input prompt could have multiple rubrics. + """ + + rubric_id: Optional[str] + """Required. Unique identifier for the rubric. + This ID is used to refer to this rubric, e.g., in RubricVerdict.""" + + content: Optional[RubricContentDict] + """Required. The actual testable criteria for the rubric.""" + + type: Optional[str] + """Optional. A type designator for the rubric, which can inform how it's + evaluated or interpreted by systems or users. + It's recommended to use consistent, well-defined, upper snake_case strings. + Examples: "SUMMARIZATION_QUALITY", "SAFETY_HARMFUL_CONTENT", + "INSTRUCTION_ADHERENCE".""" + + importance: Optional[Importance] + """Optional. The relative importance of this rubric.""" + + +RubricOrDict = Union[Rubric, RubricDict] + + +class RubricVerdict(_common.BaseModel): + """Represents the verdict of an evaluation against a single rubric.""" + + evaluated_rubric: Optional[Rubric] = Field( + default=None, + description="""Required. The full rubric definition that was evaluated. + Storing this ensures the verdict is self-contained and understandable, + especially if the original rubric definition changes or was dynamically + generated.""", + ) + verdict: Optional[bool] = Field( + default=None, + description="""Required. Outcome of the evaluation against the rubric, represented as a + boolean. `true` indicates a "Pass", `false` indicates a "Fail".""", + ) + reasoning: Optional[str] = Field( + default=None, + description="""Optional. Human-readable reasoning or explanation for the verdict. + This can include specific examples or details from the evaluated content + that justify the given verdict.""", + ) + + +class RubricVerdictDict(TypedDict, total=False): + """Represents the verdict of an evaluation against a single rubric.""" + + evaluated_rubric: Optional[RubricDict] + """Required. The full rubric definition that was evaluated. + Storing this ensures the verdict is self-contained and understandable, + especially if the original rubric definition changes or was dynamically + generated.""" + + verdict: Optional[bool] + """Required. Outcome of the evaluation against the rubric, represented as a + boolean. `true` indicates a "Pass", `false` indicates a "Fail".""" + + reasoning: Optional[str] + """Optional. Human-readable reasoning or explanation for the verdict. + This can include specific examples or details from the evaluated content + that justify the given verdict.""" + + +RubricVerdictOrDict = Union[RubricVerdict, RubricVerdictDict] + + +class CandidateResult(_common.BaseModel): + """Result for a single candidate.""" + + candidate: Optional[str] = Field( + default=None, + description="""The candidate that is being evaluated. The value is the same as the candidate name in the EvaluationRequest.""", + ) + metric: Optional[str] = Field( + default=None, description="""The metric that was evaluated.""" + ) + score: Optional[float] = Field( + default=None, description="""The score of the metric.""" + ) + explanation: Optional[str] = Field( + default=None, description="""The explanation for the metric.""" + ) + rubric_verdicts: Optional[list[RubricVerdict]] = Field( + default=None, description="""The rubric verdicts for the metric.""" + ) + additional_results: Optional[dict[str, Any]] = Field( + default=None, description="""Additional results for the metric.""" + ) + + +class CandidateResultDict(TypedDict, total=False): + """Result for a single candidate.""" + + candidate: Optional[str] + """The candidate that is being evaluated. The value is the same as the candidate name in the EvaluationRequest.""" + + metric: Optional[str] + """The metric that was evaluated.""" + + score: Optional[float] + """The score of the metric.""" + + explanation: Optional[str] + """The explanation for the metric.""" + + rubric_verdicts: Optional[list[RubricVerdictDict]] + """The rubric verdicts for the metric.""" + + additional_results: Optional[dict[str, Any]] + """Additional results for the metric.""" + + +CandidateResultOrDict = Union[CandidateResult, CandidateResultDict] + + +class Event(_common.BaseModel): + """Represents an event in a conversation between agents and users. + + It is used to store the content of the conversation, as well as the actions + taken by the agents like function calls, function responses, intermediate NL + responses etc. + """ + + event_id: Optional[str] = Field( + default=None, description="""Unique identifier for the agent event.""" + ) + content: Optional[genai_types.Content] = Field( + default=None, description="""Content of the event.""" + ) + creation_timestamp: Optional[datetime.datetime] = Field( + default=None, description="""The creation timestamp of the event.""" + ) + author: Optional[str] = Field( + default=None, description="""Name of the entity that produced the event.""" + ) + + +class EventDict(TypedDict, total=False): + """Represents an event in a conversation between agents and users. + + It is used to store the content of the conversation, as well as the actions + taken by the agents like function calls, function responses, intermediate NL + responses etc. + """ + + event_id: Optional[str] + """Unique identifier for the agent event.""" + + content: Optional[genai_types.ContentDict] + """Content of the event.""" + + creation_timestamp: Optional[datetime.datetime] + """The creation timestamp of the event.""" + + author: Optional[str] + """Name of the entity that produced the event.""" + + +EventOrDict = Union[Event, EventDict] + + +class Message(_common.BaseModel): + """Represents a single message turn in a conversation.""" + + turn_id: Optional[str] = Field( + default=None, description="""Unique identifier for the message turn.""" + ) + content: Optional[genai_types.Content] = Field( + default=None, description="""Content of the message, including function call.""" + ) + creation_timestamp: Optional[datetime.datetime] = Field( + default=None, + description="""Timestamp indicating when the message was created.""", + ) + author: Optional[str] = Field( + default=None, description="""Name of the entity that produced the message.""" + ) + + +class MessageDict(TypedDict, total=False): + """Represents a single message turn in a conversation.""" + + turn_id: Optional[str] + """Unique identifier for the message turn.""" + + content: Optional[genai_types.ContentDict] + """Content of the message, including function call.""" + + creation_timestamp: Optional[datetime.datetime] + """Timestamp indicating when the message was created.""" + + author: Optional[str] + """Name of the entity that produced the message.""" + + +MessageOrDict = Union[Message, MessageDict] + + +class SessionInput(_common.BaseModel): + """This field is experimental and may change in future versions. + + Input to initialize a session and run an agent, used for agent evaluation. + """ + + user_id: Optional[str] = Field(default=None, description="""The user id.""") + state: Optional[dict[str, str]] = Field( + default=None, description="""The state of the session.""" + ) + + +class SessionInputDict(TypedDict, total=False): + """This field is experimental and may change in future versions. + + Input to initialize a session and run an agent, used for agent evaluation. + """ + + user_id: Optional[str] + """The user id.""" + + state: Optional[dict[str, str]] + """The state of the session.""" + + +SessionInputOrDict = Union[SessionInput, SessionInputDict] + + class Tools(_common.BaseModel): """Represents a list of tools for an agent."""