From 82f752fcd62f94d5ddeae753225af0465e40fa62 Mon Sep 17 00:00:00 2001 From: secustor Date: Tue, 21 Oct 2025 00:28:28 +0200 Subject: [PATCH 01/15] feat: add support for anthropic api to anthropic on aws bedrock Signed-off-by: secustor --- api/v1alpha1/shared_types.go | 8 +- examples/basic/aws-bedrock-anthropic.yaml | 93 +++ internal/extproc/messages_processor.go | 5 +- .../translator/anthropic_awsanthropic.go | 184 +++++ .../translator/anthropic_awsanthropic_test.go | 650 ++++++++++++++++++ internal/filterapi/filterconfig.go | 3 + ...teway.envoyproxy.io_aiservicebackends.yaml | 1 + ...teway.envoyproxy.io_aiservicebackends.yaml | 1 + site/docs/api/api.mdx | 5 + .../llm-integrations/supported-endpoints.md | 2 + .../aws-bedrock-anthropic.md | 344 +++++++++ .../connect-providers/index.md | 1 + tests/extproc/extproc_test.go | 1 + tests/extproc/real_providers_test.go | 4 + 14 files changed, 1300 insertions(+), 2 deletions(-) create mode 100644 examples/basic/aws-bedrock-anthropic.yaml create mode 100644 internal/extproc/translator/anthropic_awsanthropic.go create mode 100644 internal/extproc/translator/anthropic_awsanthropic_test.go create mode 100644 site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md diff --git a/api/v1alpha1/shared_types.go b/api/v1alpha1/shared_types.go index 596a6a56e2..98e97942d1 100644 --- a/api/v1alpha1/shared_types.go +++ b/api/v1alpha1/shared_types.go @@ -15,7 +15,7 @@ package v1alpha1 type VersionedAPISchema struct { // Name is the name of the API schema of the AIGatewayRoute or AIServiceBackend. // - // +kubebuilder:validation:Enum=OpenAI;AWSBedrock;AzureOpenAI;GCPVertexAI;GCPAnthropic;Anthropic + // +kubebuilder:validation:Enum=OpenAI;AWSBedrock;AzureOpenAI;GCPVertexAI;GCPAnthropic;Anthropic;AWSAnthropic Name APISchema `json:"name"` // Version is the version of the API schema. @@ -65,6 +65,12 @@ const ( // APISchemaAnthropic is the native Anthropic API schema. // https://docs.claude.com/en/home APISchemaAnthropic APISchema = "Anthropic" + // APISchemaAWSAnthropic is the schema for Anthropic models hosted on AWS Bedrock. + // Uses the native Anthropic Messages API format for requests and responses. + // + // https://aws.amazon.com/bedrock/anthropic/ + // https://docs.claude.com/en/api/claude-on-amazon-bedrock + APISchemaAWSAnthropic APISchema = "AWSAnthropic" ) const ( diff --git a/examples/basic/aws-bedrock-anthropic.yaml b/examples/basic/aws-bedrock-anthropic.yaml new file mode 100644 index 0000000000..b2db5df483 --- /dev/null +++ b/examples/basic/aws-bedrock-anthropic.yaml @@ -0,0 +1,93 @@ +# Copyright Envoy AI Gateway Authors +# SPDX-License-Identifier: Apache-2.0 +# The full text of the Apache license is available in the LICENSE file at +# the root of the repo. + +apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: AIGatewayRoute +metadata: + name: envoy-ai-gateway-basic-aws-bedrock-anthropic + namespace: default +spec: + parentRefs: + - name: envoy-ai-gateway-basic + kind: Gateway + group: gateway.networking.k8s.io + rules: + - matches: + - headers: + - type: Exact + name: x-ai-eg-model + value: anthropic.claude-3-5-sonnet-20241022-v2:0 + backendRefs: + - name: envoy-ai-gateway-basic-aws-bedrock-anthropic +--- +apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: AIServiceBackend +metadata: + name: envoy-ai-gateway-basic-aws-bedrock-anthropic + namespace: default +spec: + schema: + name: AWSAnthropic + version: bedrock-2023-05-31 + backendRef: + name: envoy-ai-gateway-basic-aws-bedrock-anthropic + kind: Backend + group: gateway.envoyproxy.io +--- +apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: BackendSecurityPolicy +metadata: + name: envoy-ai-gateway-basic-aws-bedrock-anthropic-credentials + namespace: default +spec: + targetRefs: + - group: aigateway.envoyproxy.io + kind: AIServiceBackend + name: envoy-ai-gateway-basic-aws-bedrock-anthropic + type: AWSCredentials + awsCredentials: + region: us-east-1 + credentialsFile: + secretRef: + name: envoy-ai-gateway-basic-aws-bedrock-anthropic-credentials +--- +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: Backend +metadata: + name: envoy-ai-gateway-basic-aws-bedrock-anthropic + namespace: default +spec: + endpoints: + - fqdn: + hostname: bedrock-runtime.us-east-1.amazonaws.com + port: 443 +--- +apiVersion: gateway.networking.k8s.io/v1alpha3 +kind: BackendTLSPolicy +metadata: + name: envoy-ai-gateway-basic-aws-bedrock-anthropic-tls + namespace: default +spec: + targetRefs: + - group: "gateway.envoyproxy.io" + kind: Backend + name: envoy-ai-gateway-basic-aws-bedrock-anthropic + validation: + wellKnownCACertificates: "System" + hostname: bedrock-runtime.us-east-1.amazonaws.com +--- +apiVersion: v1 +kind: Secret +metadata: + name: envoy-ai-gateway-basic-aws-bedrock-anthropic-credentials + namespace: default +type: Opaque +stringData: + # Replace this with your AWS credentials. + # You can also use AWS IAM roles for service accounts (IRSA) in EKS. + credentials: | + [default] + aws_access_key_id = AWS_ACCESS_KEY_ID + aws_secret_access_key = AWS_SECRET_ACCESS_KEY diff --git a/internal/extproc/messages_processor.go b/internal/extproc/messages_processor.go index 9a5ea3eb72..f6b48ec54f 100644 --- a/internal/extproc/messages_processor.go +++ b/internal/extproc/messages_processor.go @@ -157,10 +157,13 @@ func (c *messagesProcessorUpstreamFilter) selectTranslator(out filterapi.Version // Anthropic → GCP Anthropic (request direction translator). // Uses backend config version (GCP Vertex AI requires specific versions like "vertex-2023-10-16"). c.translator = translator.NewAnthropicToGCPAnthropicTranslator(out.Version, c.modelNameOverride) + case filterapi.APISchemaAWSAnthropic: + // Anthropic → AWS Bedrock Anthropic (request direction translator). + c.translator = translator.NewAnthropicToAWSAnthropicTranslator(out.Version, c.modelNameOverride) case filterapi.APISchemaAnthropic: c.translator = translator.NewAnthropicToAnthropicTranslator(out.Version, c.modelNameOverride) default: - return fmt.Errorf("/v1/messages endpoint only supports backends that return native Anthropic format (GCPAnthropic). Backend %s uses different model format", out.Name) + return fmt.Errorf("/v1/messages endpoint only supports backends that return native Anthropic format (Anthropic, GCPAnthropic, AWSAnthropic). Backend %s uses different model format", out.Name) } return nil } diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go new file mode 100644 index 0000000000..735c9a8119 --- /dev/null +++ b/internal/extproc/translator/anthropic_awsanthropic.go @@ -0,0 +1,184 @@ +// Copyright Envoy AI Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package translator + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "maps" + "net/url" + + "github.com/anthropics/anthropic-sdk-go" + extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" + + anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic" + "github.com/envoyproxy/ai-gateway/internal/internalapi" +) + +// NewAnthropicToAWSAnthropicTranslator creates a translator for Anthropic to AWS Bedrock Anthropic format. +// AWS Bedrock supports the native Anthropic Messages API, so this is essentially a passthrough +// translator with AWS-specific path modifications. +func NewAnthropicToAWSAnthropicTranslator(apiVersion string, modelNameOverride internalapi.ModelNameOverride) AnthropicMessagesTranslator { + return &anthropicToAWSAnthropicTranslator{ + apiVersion: apiVersion, + modelNameOverride: modelNameOverride, + } +} + +type anthropicToAWSAnthropicTranslator struct { + // TODO: reuse anthropicToAnthropicTranslator and embed it here to avoid code duplication. + apiVersion string + modelNameOverride internalapi.ModelNameOverride + requestModel internalapi.RequestModel +} + +// RequestBody implements [AnthropicMessagesTranslator.RequestBody] for Anthropic to AWS Bedrock Anthropic translation. +// This handles the transformation from native Anthropic format to AWS Bedrock format. +func (a *anthropicToAWSAnthropicTranslator) RequestBody(_ []byte, body *anthropicschema.MessagesRequest, _ bool) ( + headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, err error, +) { + // Extract model name for AWS Bedrock endpoint from the parsed request. + modelName := body.GetModel() + + // Work directly with the map since MessagesRequest is already map[string]interface{}. + anthropicReq := make(map[string]any) + maps.Copy(anthropicReq, *body) + + // Apply model name override if configured. + a.requestModel = modelName + if a.modelNameOverride != "" { + a.requestModel = a.modelNameOverride + } + + // Remove the model field since AWS Bedrock doesn't want it in the body (it's in the path). + delete(anthropicReq, "model") + + // Add AWS-Bedrock-specific anthropic_version field (required by AWS Bedrock). + // Uses backend config version (e.g., "bedrock-2023-05-31" for AWS Bedrock). + if a.apiVersion == "" { + return nil, nil, fmt.Errorf("anthropic_version is required for AWS Bedrock but not provided in backend configuration") + } + anthropicReq[anthropicVersionKey] = a.apiVersion + + // Marshal the modified request. + mutatedBody, err := json.Marshal(anthropicReq) + if err != nil { + return nil, nil, fmt.Errorf("failed to marshal modified request: %w", err) + } + + // Determine the AWS Bedrock path based on whether streaming is requested. + var pathTemplate string + if stream, ok := anthropicReq["stream"].(bool); ok && stream { + pathTemplate = "/model/%s/invoke-stream" + } else { + pathTemplate = "/model/%s/invoke" + } + + // URL encode the model ID for the path to handle ARNs with special characters. + // AWS Bedrock model IDs can be simple names (e.g., "anthropic.claude-3-5-sonnet-20241022-v2:0") + // or full ARNs which may contain special characters. + encodedModelID := url.PathEscape(a.requestModel) + pathSuffix := fmt.Sprintf(pathTemplate, encodedModelID) + + headerMutation, bodyMutation = buildRequestMutations(pathSuffix, mutatedBody) + return +} + +// ResponseHeaders implements [AnthropicMessagesTranslator.ResponseHeaders] for Anthropic to AWS Bedrock Anthropic. +func (a *anthropicToAWSAnthropicTranslator) ResponseHeaders(_ map[string]string) ( + headerMutation *extprocv3.HeaderMutation, err error, +) { + // For Anthropic to AWS Bedrock Anthropic, no header transformation is needed. + return nil, nil +} + +// ResponseBody implements [AnthropicMessagesTranslator.ResponseBody] for Anthropic to AWS Bedrock Anthropic. +// This is essentially a passthrough since AWS Bedrock returns the native Anthropic response format. +func (a *anthropicToAWSAnthropicTranslator) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool) ( + headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error, +) { + // Read the response body for both streaming and non-streaming. + bodyBytes, err := io.ReadAll(body) + if err != nil { + return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err) + } + + // For streaming chunks, parse SSE format to extract token usage. + if !endOfStream { + // Parse SSE format - split by lines and look for data: lines. + for line := range bytes.Lines(bodyBytes) { + line = bytes.TrimSpace(line) + if bytes.HasPrefix(line, dataPrefix) { + jsonData := bytes.TrimPrefix(line, dataPrefix) + + var eventData map[string]any + if unmarshalErr := json.Unmarshal(jsonData, &eventData); unmarshalErr != nil { + // Skip lines with invalid JSON (like ping events or malformed data). + continue + } + if eventType, ok := eventData["type"].(string); ok { + switch eventType { + case "message_start": + // Extract input tokens from message.usage. + if messageData, ok := eventData["message"].(map[string]any); ok { + if usageData, ok := messageData["usage"].(map[string]any); ok { + if inputTokens, ok := usageData["input_tokens"].(float64); ok { + tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec + } + // Some message_start events may include initial output tokens. + if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 { + tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec + } + tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens + } + } + + case "message_delta": + if usageData, ok := eventData["usage"].(map[string]any); ok { + if outputTokens, ok := usageData["output_tokens"].(float64); ok { + // Add to existing output tokens (in case message_start had some initial ones). + tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec + tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens + } + } + } + } + } + } + + return nil, &extprocv3.BodyMutation{ + Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, + }, tokenUsage, a.requestModel, nil + } + + // Parse the Anthropic response to extract token usage. + var anthropicResp anthropic.Message + if err = json.Unmarshal(bodyBytes, &anthropicResp); err != nil { + // If we can't parse as Anthropic format, pass through as-is. + return nil, &extprocv3.BodyMutation{ + Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, + }, LLMTokenUsage{}, a.requestModel, nil + } + + // Extract token usage from the response. + tokenUsage = LLMTokenUsage{ + InputTokens: uint32(anthropicResp.Usage.InputTokens), //nolint:gosec + OutputTokens: uint32(anthropicResp.Usage.OutputTokens), //nolint:gosec + TotalTokens: uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec + CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens), //nolint:gosec + } + + // Pass through the response body unchanged since both input and output are Anthropic format. + headerMutation = &extprocv3.HeaderMutation{} + setContentLength(headerMutation, bodyBytes) + bodyMutation = &extprocv3.BodyMutation{ + Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, + } + + return headerMutation, bodyMutation, tokenUsage, a.requestModel, nil +} diff --git a/internal/extproc/translator/anthropic_awsanthropic_test.go b/internal/extproc/translator/anthropic_awsanthropic_test.go new file mode 100644 index 0000000000..8d9c442f55 --- /dev/null +++ b/internal/extproc/translator/anthropic_awsanthropic_test.go @@ -0,0 +1,650 @@ +// Copyright Envoy AI Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package translator + +import ( + "bytes" + "encoding/json" + "io" + "testing" + + "github.com/anthropics/anthropic-sdk-go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic" +) + +func TestAnthropicToAWSAnthropicTranslator_RequestBody_ModelNameOverride(t *testing.T) { + tests := []struct { + name string + override string + inputModel string + expectedModel string + expectedInPath string + }{ + { + name: "no override uses original model", + override: "", + inputModel: "anthropic.claude-3-haiku-20240307-v1:0", + expectedModel: "anthropic.claude-3-haiku-20240307-v1:0", + expectedInPath: "anthropic.claude-3-haiku-20240307-v1:0", + }, + { + name: "override replaces model in body and path", + override: "anthropic.claude-3-sonnet-20240229-v1:0", + inputModel: "anthropic.claude-3-haiku-20240307-v1:0", + expectedModel: "anthropic.claude-3-sonnet-20240229-v1:0", + expectedInPath: "anthropic.claude-3-sonnet-20240229-v1:0", + }, + { + name: "override with empty input model", + override: "anthropic.claude-3-opus-20240229-v1:0", + inputModel: "", + expectedModel: "anthropic.claude-3-opus-20240229-v1:0", + expectedInPath: "anthropic.claude-3-opus-20240229-v1:0", + }, + { + name: "model with ARN format", + override: "", + inputModel: "arn:aws:bedrock:eu-central-1:000000000:application-inference-profile/aaaaaaaaa", + expectedModel: "arn:aws:bedrock:eu-central-1:000000000:application-inference-profile/aaaaaaaaa", + expectedInPath: "arn:aws:bedrock:eu-central-1:000000000:application-inference-profile%2Faaaaaaaaa", + }, + { + name: "global model ID", + override: "", + inputModel: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", + expectedModel: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", + expectedInPath: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", tt.override) + + // Create the request using map structure. + originalReq := &anthropicschema.MessagesRequest{ + "model": tt.inputModel, + "messages": []anthropic.MessageParam{ + { + Role: anthropic.MessageParamRoleUser, + Content: []anthropic.ContentBlockParamUnion{ + anthropic.NewTextBlock("Hello"), + }, + }, + }, + } + + headerMutation, bodyMutation, err := translator.RequestBody(nil, originalReq, false) + require.NoError(t, err) + require.NotNil(t, headerMutation) + require.NotNil(t, bodyMutation) + + // Check path header contains expected model (URL encoded). + pathHeader := headerMutation.SetHeaders[0] + require.Equal(t, ":path", pathHeader.Header.Key) + expectedPath := "/model/" + tt.expectedInPath + "/invoke" + assert.Equal(t, expectedPath, string(pathHeader.Header.RawValue)) + + // Check that model field is removed from body (since it's in the path). + var modifiedReq map[string]any + err = json.Unmarshal(bodyMutation.GetBody(), &modifiedReq) + require.NoError(t, err) + _, hasModel := modifiedReq["model"] + assert.False(t, hasModel, "model field should be removed from request body") + + // Verify anthropic_version field is added (required by AWS Bedrock). + version, hasVersion := modifiedReq["anthropic_version"] + assert.True(t, hasVersion, "anthropic_version should be added for AWS Bedrock") + assert.Equal(t, "bedrock-2023-05-31", version, "anthropic_version should match the configured version") + }) + } +} + +func TestAnthropicToAWSAnthropicTranslator_ComprehensiveMarshalling(t *testing.T) { + translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") + + // Create a comprehensive MessagesRequest with all possible fields using map structure. + originalReq := &anthropicschema.MessagesRequest{ + "model": "anthropic.claude-3-opus-20240229-v1:0", + "messages": []anthropic.MessageParam{ + { + Role: anthropic.MessageParamRoleUser, + Content: []anthropic.ContentBlockParamUnion{ + anthropic.NewTextBlock("Hello, how are you?"), + }, + }, + { + Role: anthropic.MessageParamRoleAssistant, + Content: []anthropic.ContentBlockParamUnion{ + anthropic.NewTextBlock("I'm doing well, thank you!"), + }, + }, + { + Role: anthropic.MessageParamRoleUser, + Content: []anthropic.ContentBlockParamUnion{ + anthropic.NewTextBlock("Can you help me with the weather?"), + }, + }, + }, + "max_tokens": 1024, + "stream": false, + "temperature": func() *float64 { v := 0.7; return &v }(), + "top_p": func() *float64 { v := 0.95; return &v }(), + "top_k": func() *int { v := 40; return &v }(), + "stop_sequences": []string{"Human:", "Assistant:"}, + "system": "You are a helpful weather assistant.", + "tools": []anthropic.ToolParam{ + { + Name: "get_weather", + Description: anthropic.String("Get current weather information"), + InputSchema: anthropic.ToolInputSchemaParam{ + Type: "object", + Properties: map[string]any{ + "location": map[string]any{ + "type": "string", + "description": "City name", + }, + }, + Required: []string{"location"}, + }, + }, + }, + "tool_choice": anthropic.ToolChoiceUnionParam{ + OfAuto: &anthropic.ToolChoiceAutoParam{}, + }, + } + + headerMutation, bodyMutation, err := translator.RequestBody(nil, originalReq, false) + require.NoError(t, err) + require.NotNil(t, headerMutation) + require.NotNil(t, bodyMutation) + + var outputReq map[string]any + err = json.Unmarshal(bodyMutation.GetBody(), &outputReq) + require.NoError(t, err) + + require.NotContains(t, outputReq, "model", "model field should be removed for AWS Bedrock") + + // AWS Bedrock requires anthropic_version field. + require.Contains(t, outputReq, "anthropic_version", "anthropic_version should be added for AWS Bedrock") + require.Equal(t, "bedrock-2023-05-31", outputReq["anthropic_version"], "anthropic_version should match the configured version") + + messages, ok := outputReq["messages"].([]any) + require.True(t, ok, "messages should be an array") + require.Len(t, messages, 3, "should have 3 messages") + + require.Equal(t, float64(1024), outputReq["max_tokens"]) + require.Equal(t, false, outputReq["stream"]) + require.Equal(t, 0.7, outputReq["temperature"]) + require.Equal(t, 0.95, outputReq["top_p"]) + require.Equal(t, float64(40), outputReq["top_k"]) + require.Equal(t, "You are a helpful weather assistant.", outputReq["system"]) + + stopSeq, ok := outputReq["stop_sequences"].([]any) + require.True(t, ok, "stop_sequences should be an array") + require.Len(t, stopSeq, 2) + require.Equal(t, "Human:", stopSeq[0]) + require.Equal(t, "Assistant:", stopSeq[1]) + + tools, ok := outputReq["tools"].([]any) + require.True(t, ok, "tools should be an array") + require.Len(t, tools, 1) + + toolChoice, ok := outputReq["tool_choice"].(map[string]any) + require.True(t, ok, "tool_choice should be an object") + require.NotEmpty(t, toolChoice) + + pathHeader := headerMutation.SetHeaders[0] + require.Equal(t, ":path", pathHeader.Header.Key) + expectedPath := "/model/anthropic.claude-3-opus-20240229-v1:0/invoke" + require.Equal(t, expectedPath, string(pathHeader.Header.RawValue)) +} + +func TestAnthropicToAWSAnthropicTranslator_RequestBody_StreamingPaths(t *testing.T) { + tests := []struct { + name string + stream any + expectedPathSuffix string + }{ + { + name: "non-streaming uses /invoke", + stream: false, + expectedPathSuffix: "/invoke", + }, + { + name: "streaming uses /invoke-stream", + stream: true, + expectedPathSuffix: "/invoke-stream", + }, + { + name: "missing stream defaults to /invoke", + stream: nil, + expectedPathSuffix: "/invoke", + }, + { + name: "non-boolean stream defaults to /invoke", + stream: "true", + expectedPathSuffix: "/invoke", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") + + parsedReq := &anthropicschema.MessagesRequest{ + "model": "anthropic.claude-3-sonnet-20240229-v1:0", + "messages": []anthropic.MessageParam{ + { + Role: anthropic.MessageParamRoleUser, + Content: []anthropic.ContentBlockParamUnion{ + anthropic.NewTextBlock("Test"), + }, + }, + }, + } + if tt.stream != nil { + if streamVal, ok := tt.stream.(bool); ok { + (*parsedReq)["stream"] = streamVal + } + } + + headerMutation, _, err := translator.RequestBody(nil, parsedReq, false) + require.NoError(t, err) + require.NotNil(t, headerMutation) + + // Check path contains expected suffix. + pathHeader := headerMutation.SetHeaders[0] + expectedPath := "/model/anthropic.claude-3-sonnet-20240229-v1:0" + tt.expectedPathSuffix + assert.Equal(t, expectedPath, string(pathHeader.Header.RawValue)) + }) + } +} + +func TestAnthropicToAWSAnthropicTranslator_RequestBody_FieldPassthrough(t *testing.T) { + translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") + + temp := 0.7 + topP := 0.95 + topK := 40 + parsedReq := &anthropicschema.MessagesRequest{ + "model": "anthropic.claude-3-sonnet-20240229-v1:0", + "messages": []anthropic.MessageParam{ + { + Role: anthropic.MessageParamRoleUser, + Content: []anthropic.ContentBlockParamUnion{ + anthropic.NewTextBlock("Hello, world!"), + }, + }, + { + Role: anthropic.MessageParamRoleAssistant, + Content: []anthropic.ContentBlockParamUnion{ + anthropic.NewTextBlock("Hi there!"), + }, + }, + { + Role: anthropic.MessageParamRoleUser, + Content: []anthropic.ContentBlockParamUnion{ + anthropic.NewTextBlock("How are you?"), + }, + }, + }, + "max_tokens": 1000, + "temperature": &temp, + "top_p": &topP, + "top_k": &topK, + "stop_sequences": []string{"Human:", "Assistant:"}, + "stream": false, + "system": "You are a helpful assistant", + "tools": []anthropic.ToolParam{ + { + Name: "get_weather", + Description: anthropic.String("Get weather info"), + InputSchema: anthropic.ToolInputSchemaParam{ + Type: "object", + Properties: map[string]any{ + "location": map[string]any{"type": "string"}, + }, + }, + }, + }, + "tool_choice": map[string]any{"type": "auto"}, + "metadata": map[string]any{"user.id": "test123"}, + } + + _, bodyMutation, err := translator.RequestBody(nil, parsedReq, false) + require.NoError(t, err) + require.NotNil(t, bodyMutation) + + var modifiedReq map[string]any + err = json.Unmarshal(bodyMutation.GetBody(), &modifiedReq) + require.NoError(t, err) + + // Messages should be preserved. + require.Len(t, modifiedReq["messages"], 3) + + // Numeric fields get converted to float64 by JSON unmarshalling. + require.Equal(t, float64(1000), modifiedReq["max_tokens"]) + require.Equal(t, 0.7, modifiedReq["temperature"]) + require.Equal(t, 0.95, modifiedReq["top_p"]) + require.Equal(t, float64(40), modifiedReq["top_k"]) + + // Arrays become []interface{} by JSON unmarshalling. + stopSeq, ok := modifiedReq["stop_sequences"].([]any) + require.True(t, ok) + require.Len(t, stopSeq, 2) + require.Equal(t, "Human:", stopSeq[0]) + require.Equal(t, "Assistant:", stopSeq[1]) + + // Boolean false values are now included in the map. + require.Equal(t, false, modifiedReq["stream"]) + + // String values are preserved. + require.Equal(t, "You are a helpful assistant", modifiedReq["system"]) + + // Complex objects should be preserved as maps. + require.NotNil(t, modifiedReq["tools"]) + require.NotNil(t, modifiedReq["tool_choice"]) + require.NotNil(t, modifiedReq["metadata"]) + + // Verify model field is removed from body (it's in the path instead). + _, hasModel := modifiedReq["model"] + require.False(t, hasModel, "model field should be removed from request body") + + // Verify anthropic_version is added for AWS Bedrock. + version, hasVersion := modifiedReq["anthropic_version"] + require.True(t, hasVersion, "anthropic_version should be added for AWS Bedrock") + require.Equal(t, "bedrock-2023-05-31", version, "anthropic_version should match the configured version") +} + +func TestAnthropicToAWSAnthropicTranslator_ResponseHeaders(t *testing.T) { + translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") + + tests := []struct { + name string + headers map[string]string + }{ + { + name: "empty headers", + headers: map[string]string{}, + }, + { + name: "various headers", + headers: map[string]string{ + "content-type": "application/json", + "authorization": "Bearer token", + "custom-header": "value", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + headerMutation, err := translator.ResponseHeaders(tt.headers) + require.NoError(t, err) + assert.Nil(t, headerMutation, "ResponseHeaders should return nil for passthrough") + }) + } +} + +func TestAnthropicToAWSAnthropicTranslator_ResponseBody_NonStreaming(t *testing.T) { + translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") + + // Create a sample Anthropic response. + respBody := anthropic.Message{ + ID: "msg_test123", + Type: "message", + Role: "assistant", + Content: []anthropic.ContentBlockUnion{ + {Type: "text", Text: "Hello! How can I help you today?"}, + }, + Model: "claude-3-sonnet-20240229", + Usage: anthropic.Usage{ + InputTokens: 25, + OutputTokens: 15, + }, + } + + bodyBytes, err := json.Marshal(respBody) + require.NoError(t, err) + + bodyReader := bytes.NewReader(bodyBytes) + respHeaders := map[string]string{"content-type": "application/json"} + + headerMutation, bodyMutation, tokenUsage, responseModel, err := translator.ResponseBody(respHeaders, bodyReader, true) + require.NoError(t, err) + require.NotNil(t, headerMutation) + require.NotNil(t, bodyMutation) + + expectedUsage := LLMTokenUsage{ + InputTokens: 25, + OutputTokens: 15, + TotalTokens: 40, + } + assert.Equal(t, expectedUsage, tokenUsage) + + // responseModel should be populated from requestModel set during RequestBody. + assert.Empty(t, responseModel) + + // Verify body is passed through - compare key fields. + var outputResp anthropic.Message + err = json.Unmarshal(bodyMutation.GetBody(), &outputResp) + require.NoError(t, err) + assert.Equal(t, respBody.ID, outputResp.ID) + assert.Equal(t, respBody.Type, outputResp.Type) + assert.Equal(t, respBody.Role, outputResp.Role) + assert.Equal(t, respBody.Model, outputResp.Model) + assert.Equal(t, respBody.Usage.InputTokens, outputResp.Usage.InputTokens) + assert.Equal(t, respBody.Usage.OutputTokens, outputResp.Usage.OutputTokens) +} + +func TestAnthropicToAWSAnthropicTranslator_ResponseBody_WithCachedTokens(t *testing.T) { + translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") + + // Test response with cached input tokens. + respBody := anthropic.Message{ + ID: "msg_cached", + Type: "message", + Role: "assistant", + Content: []anthropic.ContentBlockUnion{{Type: "text", Text: "Response with cache"}}, + Model: "claude-3-sonnet-20240229", + Usage: anthropic.Usage{ + InputTokens: 50, + OutputTokens: 20, + CacheReadInputTokens: 30, + CacheCreationInputTokens: 10, + }, + } + + bodyBytes, err := json.Marshal(respBody) + require.NoError(t, err) + + bodyReader := bytes.NewReader(bodyBytes) + respHeaders := map[string]string{"content-type": "application/json"} + + _, _, tokenUsage, _, err := translator.ResponseBody(respHeaders, bodyReader, true) + require.NoError(t, err) + + expectedUsage := LLMTokenUsage{ + InputTokens: 50, + OutputTokens: 20, + TotalTokens: 70, + CachedInputTokens: 30, + } + assert.Equal(t, expectedUsage, tokenUsage) +} + +func TestAnthropicToAWSAnthropicTranslator_ResponseBody_StreamingTokenUsage(t *testing.T) { + translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") + + tests := []struct { + name string + chunk string + endOfStream bool + expectedUsage LLMTokenUsage + expectedBody string + }{ + { + name: "message_start chunk with token usage", + chunk: "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_123\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"model\":\"claude-3-sonnet-20240229\",\"usage\":{\"input_tokens\":25,\"output_tokens\":0}}}\n\n", + endOfStream: false, + expectedUsage: LLMTokenUsage{ + InputTokens: 25, + OutputTokens: 0, + TotalTokens: 25, + }, + expectedBody: "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_123\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"model\":\"claude-3-sonnet-20240229\",\"usage\":{\"input_tokens\":25,\"output_tokens\":0}}}\n\n", + }, + { + name: "content_block_delta chunk without usage", + chunk: "event: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\" to me.\"}}\n\n", + endOfStream: false, + expectedUsage: LLMTokenUsage{ + InputTokens: 0, + OutputTokens: 0, + TotalTokens: 0, + }, + expectedBody: "event: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\" to me.\"}}\n\n", + }, + { + name: "message_delta chunk with output tokens", + chunk: "event: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null},\"usage\":{\"output_tokens\":84}}\n\n", + endOfStream: false, + expectedUsage: LLMTokenUsage{ + InputTokens: 0, + OutputTokens: 84, + TotalTokens: 84, + }, + expectedBody: "event: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null},\"usage\":{\"output_tokens\":84}}\n\n", + }, + { + name: "message_stop chunk without usage", + chunk: "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n", + endOfStream: false, + expectedUsage: LLMTokenUsage{ + InputTokens: 0, + OutputTokens: 0, + TotalTokens: 0, + }, + expectedBody: "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + bodyReader := bytes.NewReader([]byte(tt.chunk)) + respHeaders := map[string]string{"content-type": "text/event-stream"} + + headerMutation, bodyMutation, tokenUsage, _, err := translator.ResponseBody(respHeaders, bodyReader, tt.endOfStream) + + require.NoError(t, err) + require.Nil(t, headerMutation) + require.NotNil(t, bodyMutation) + require.Equal(t, tt.expectedBody, string(bodyMutation.GetBody())) + require.Equal(t, tt.expectedUsage, tokenUsage) + }) + } +} + +func TestAnthropicToAWSAnthropicTranslator_ResponseBody_ReadError(t *testing.T) { + translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") + + // Create a reader that will fail. + errorReader := &awsAnthropicErrorReader{} + respHeaders := map[string]string{"content-type": "application/json"} + + _, _, _, _, err := translator.ResponseBody(respHeaders, errorReader, true) + require.Error(t, err) + assert.Contains(t, err.Error(), "failed to read response body") +} + +// awsAnthropicErrorReader implements io.Reader but always returns an error. +type awsAnthropicErrorReader struct{} + +func (e *awsAnthropicErrorReader) Read(_ []byte) (n int, err error) { + return 0, io.ErrUnexpectedEOF +} + +func TestAnthropicToAWSAnthropicTranslator_ResponseBody_InvalidJSON(t *testing.T) { + translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") + + invalidJSON := []byte(`{invalid json}`) + bodyReader := bytes.NewReader(invalidJSON) + respHeaders := map[string]string{"content-type": "application/json"} + + headerMutation, bodyMutation, tokenUsage, _, err := translator.ResponseBody(respHeaders, bodyReader, true) + + // Should not error - just pass through invalid JSON. + require.NoError(t, err) + require.NotNil(t, bodyMutation) + // headerMutation is set with content-length for non-streaming responses + if headerMutation != nil { + assert.NotEmpty(t, headerMutation.SetHeaders) + } + + //nolint:testifylint // testifylint want to use JSONEq which is not possible + assert.Equal(t, invalidJSON, bodyMutation.GetBody()) + + // Token usage should be zero for invalid JSON. + expectedUsage := LLMTokenUsage{ + InputTokens: 0, + OutputTokens: 0, + TotalTokens: 0, + } + assert.Equal(t, expectedUsage, tokenUsage) +} + +func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) { + tests := []struct { + name string + modelID string + expectedPath string + }{ + { + name: "simple model ID with colon", + modelID: "anthropic.claude-3-sonnet-20240229-v1:0", + expectedPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke", + }, + { + name: "full ARN with multiple special characters", + modelID: "arn:aws:bedrock:us-east-1:123456789012:foundation-model/anthropic.claude-3-sonnet-20240229-v1:0", + expectedPath: "/model/arn:aws:bedrock:us-east-1:123456789012:foundation-model%2Fanthropic.claude-3-sonnet-20240229-v1:0/invoke", + }, + { + name: "global model prefix", + modelID: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", + expectedPath: "/model/global.anthropic.claude-sonnet-4-5-20250929-v1:0/invoke", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") + + originalReq := &anthropicschema.MessagesRequest{ + "model": tt.modelID, + "messages": []anthropic.MessageParam{ + { + Role: anthropic.MessageParamRoleUser, + Content: []anthropic.ContentBlockParamUnion{ + anthropic.NewTextBlock("Test"), + }, + }, + }, + } + + headerMutation, _, err := translator.RequestBody(nil, originalReq, false) + require.NoError(t, err) + require.NotNil(t, headerMutation) + + pathHeader := headerMutation.SetHeaders[0] + assert.Equal(t, tt.expectedPath, string(pathHeader.Header.RawValue)) + }) + } +} diff --git a/internal/filterapi/filterconfig.go b/internal/filterapi/filterconfig.go index b9eb226216..80b013a08b 100644 --- a/internal/filterapi/filterconfig.go +++ b/internal/filterapi/filterconfig.go @@ -117,6 +117,9 @@ const ( APISchemaGCPAnthropic APISchemaName = "GCPAnthropic" // APISchemaAnthropic represents the standard Anthropic API schema. APISchemaAnthropic APISchemaName = "Anthropic" + // APISchemaAWSAnthropic represents the AWS Bedrock Anthropic API schema. + // Used for Claude models hosted on AWS Bedrock using the native Anthropic Messages API. + APISchemaAWSAnthropic APISchemaName = "AWSAnthropic" ) // RouteRuleName is the name of the route rule. diff --git a/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aiservicebackends.yaml b/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aiservicebackends.yaml index 6c2cf79190..f46b75d026 100644 --- a/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aiservicebackends.yaml +++ b/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aiservicebackends.yaml @@ -235,6 +235,7 @@ spec: - GCPVertexAI - GCPAnthropic - Anthropic + - AWSAnthropic type: string version: description: |- diff --git a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aiservicebackends.yaml b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aiservicebackends.yaml index 6c2cf79190..f46b75d026 100644 --- a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aiservicebackends.yaml +++ b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aiservicebackends.yaml @@ -235,6 +235,7 @@ spec: - GCPVertexAI - GCPAnthropic - Anthropic + - AWSAnthropic type: string version: description: |- diff --git a/site/docs/api/api.mdx b/site/docs/api/api.mdx index 6a11fe730e..ead38a39b9 100644 --- a/site/docs/api/api.mdx +++ b/site/docs/api/api.mdx @@ -757,6 +757,11 @@ APISchema defines the API schema. type="enum" required="false" description="APISchemaAnthropic is the native Anthropic API schema.
https://docs.claude.com/en/home
" +/> #### AWSCredentialsFile diff --git a/site/docs/capabilities/llm-integrations/supported-endpoints.md b/site/docs/capabilities/llm-integrations/supported-endpoints.md index fa1c4ac8ad..31deb9a0a0 100644 --- a/site/docs/capabilities/llm-integrations/supported-endpoints.md +++ b/site/docs/capabilities/llm-integrations/supported-endpoints.md @@ -80,6 +80,7 @@ curl -H "Content-Type: application/json" \ - Anthropic - GCP Anthropic +- AWS Anthropic **Example:** @@ -212,6 +213,7 @@ The following table summarizes which providers support which endpoints: | [Tetrate Agent Router Service (TARS)](https://router.tetrate.ai/) | ⚠️ | ⚠️ | ⚠️ | ❌ | Via OpenAI-compatible API | | [Google Vertex AI](https://cloud.google.com/vertex-ai/docs/reference/rest) | ✅ | 🚧 | 🚧 | ❌ | Via OpenAI-compatible API | | [Anthropic on Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude) | ✅ | ❌ | 🚧 | ✅ | Via OpenAI-compatible API and Native Anthropic API | +| [Anthropic on AWS Bedrock](https://aws.amazon.com/bedrock/anthropic/) | 🚧 | ❌ | ❌ | ✅ | Native Anthropic API | | [SambaNova](https://docs.sambanova.ai/sambastudio/latest/open-ai-api.html) | ✅ | ⚠️ | ✅ | ❌ | Via OpenAI-compatible API | | [Anthropic](https://docs.claude.com/en/home) | ✅ | ❌ | ❌ | ✅ | Via OpenAI-compatible API and Native Anthropic API | diff --git a/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md b/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md new file mode 100644 index 0000000000..2a63dd7b27 --- /dev/null +++ b/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md @@ -0,0 +1,344 @@ +--- +id: aws-bedrock-anthropic +title: Connect AWS Bedrock (Anthropic Native API) +sidebar_position: 4 +--- + +# Connect AWS Bedrock with Anthropic Native API + +This guide shows you how to configure Envoy AI Gateway to use Anthropic models on AWS Bedrock with the **native Anthropic Messages API format**. This allows you to use the `/anthropic/v1/messages` endpoint to call Claude models hosted on AWS Bedrock. + +> [!NOTE] +> If you want to use AWS Bedrock models with the OpenAI-compatible format (`/v1/chat/completions`), see the [AWS Bedrock guide](./aws-bedrock.md) instead. + +## Prerequisites + +Before you begin, you'll need: + +- AWS credentials with access to Bedrock +- Basic setup completed from the [Basic Usage](../basic-usage.md) guide +- Basic configuration removed as described in the [Advanced Configuration](./index.md) overview +- Model access enabled for Anthropic Claude models in your AWS region + +## AWS Credentials Setup + +Ensure you have: + +1. An AWS account with Bedrock access enabled +2. AWS credentials with permissions to: + - `bedrock:InvokeModel` + - `bedrock:ListFoundationModels` +3. Your AWS access key ID and secret access key +4. Enabled model access to Anthropic Claude models in your desired AWS region (e.g., `us-east-1`) + - Go to the AWS Bedrock console and request access to Anthropic models + - If you want to use a different AWS region, you must update all instances of `us-east-1` with the desired region in the configuration file + +> [!TIP] +> Consider using AWS IAM roles and limited-scope credentials for production environments. For EKS clusters, AWS IAM Roles for Service Accounts (IRSA) is recommended. + +## Why Use the Native Anthropic API? + +The native Anthropic API provides several advantages when working with Claude models: + +- **Full feature support**: Access all Anthropic-specific features like extended thinking, prompt caching, and tool use +- **Consistent API**: Use the same API format you would with Anthropic's direct API +- **Better compatibility**: Avoid potential translation issues between OpenAI and Anthropic formats +- **Feature parity**: Get immediate access to new Anthropic features as they're released + +## Configuration Steps + +> [!IMPORTANT] +> Ensure you have followed the prerequisite steps in [Connect Providers](../connect-providers/) before proceeding. + +### 1. Download Configuration Template + +```shell +curl -O https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/examples/basic/aws-bedrock-anthropic.yaml +``` + +### 2. Configure AWS Credentials + +Edit the `aws-bedrock-anthropic.yaml` file to replace these placeholder values: + +- `AWS_ACCESS_KEY_ID`: Your AWS access key ID +- `AWS_SECRET_ACCESS_KEY`: Your AWS secret access key +- Update the `region` field if you're using a region other than `us-east-1` +- Update the model ID in the `value` field if you want to use a different Claude model + +> [!CAUTION] +> Make sure to keep your AWS credentials secure and never commit them to version control. The credentials will be stored in Kubernetes secrets. + +### 3. Apply Configuration + +Apply the updated configuration and wait for the Gateway pod to be ready. If you already have a Gateway running, the secret credential update will be picked up automatically in a few seconds. + +```shell +kubectl apply -f aws-bedrock-anthropic.yaml + +kubectl wait pods --timeout=2m \ + -l gateway.envoyproxy.io/owning-gateway-name=envoy-ai-gateway-basic \ + -n envoy-gateway-system \ + --for=condition=Ready +``` + +### 4. Test the Configuration + +You should have set `$GATEWAY_URL` as part of the basic setup before connecting to providers. See the [Basic Usage](../basic-usage.md) page for instructions. + +Test your configuration using the native Anthropic Messages API format: + +```shell +curl -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "messages": [ + { + "role": "user", + "content": "What is the capital of France?" + } + ], + "max_tokens": 100 + }' \ + $GATEWAY_URL/anthropic/v1/messages +``` + +Expected output: + +```json +{ + "id": "msg_01XFDUDYJgAACzvnptvVoYEL", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "The capital of France is Paris." + } + ], + "model": "claude-3-5-sonnet-20241022", + "stop_reason": "end_turn", + "usage": { + "input_tokens": 13, + "output_tokens": 8 + } +} +``` + +### 5. Test Streaming + +The native Anthropic API also supports streaming responses: + +```shell +curl -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "messages": [ + { + "role": "user", + "content": "Count from 1 to 5." + } + ], + "max_tokens": 100, + "stream": true + }' \ + $GATEWAY_URL/anthropic/v1/messages +``` + +## Available Anthropic Models on AWS Bedrock + +AWS Bedrock supports several Claude model versions. Here are some commonly used model IDs: + +| Model Name | AWS Bedrock Model ID | +| ---------------------------- | ----------------------------------------- | +| Claude 3.5 Sonnet (Oct 2024) | anthropic.claude-3-5-sonnet-20241022-v2:0 | +| Claude 3.5 Sonnet (Jun 2024) | anthropic.claude-3-5-sonnet-20240620-v1:0 | +| Claude 3 Opus | anthropic.claude-3-opus-20240229-v1:0 | +| Claude 3 Sonnet | anthropic.claude-3-sonnet-20240229-v1:0 | +| Claude 3 Haiku | anthropic.claude-3-haiku-20240307-v1:0 | + +> [!NOTE] +> Model availability varies by AWS region. Check the [AWS Bedrock documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html) for the complete list of supported models in your region. + +## Configuring More Models + +To use additional models, add more `AIGatewayRoute` rules to the configuration file. Each rule should specify a different model ID: + +```yaml +apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: AIGatewayRoute +metadata: + name: envoy-ai-gateway-basic-aws-bedrock-anthropic + namespace: default +spec: + parentRefs: + - name: envoy-ai-gateway-basic + kind: Gateway + group: gateway.networking.k8s.io + rules: + # Claude 3.5 Sonnet (Oct 2024) + - matches: + - headers: + - type: Exact + name: x-ai-eg-model + value: anthropic.claude-3-5-sonnet-20241022-v2:0 + backendRefs: + - name: envoy-ai-gateway-basic-aws-bedrock-anthropic + # Claude 3 Opus + - matches: + - headers: + - type: Exact + name: x-ai-eg-model + value: anthropic.claude-3-opus-20240229-v1:0 + backendRefs: + - name: envoy-ai-gateway-basic-aws-bedrock-anthropic +``` + +## Advanced Features + +### Using Anthropic-Specific Features + +Since this configuration uses the native Anthropic API, you have full access to Anthropic-specific features: + +#### Extended Thinking + +```shell +curl -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "messages": [ + { + "role": "user", + "content": "Solve this puzzle: A farmer needs to cross a river with a fox, chicken, and bag of grain. The boat can only hold the farmer and one item. How does the farmer get everything across safely?" + } + ], + "max_tokens": 1000, + "thinking": { + "type": "enabled", + "budget_tokens": 5000 + } + }' \ + $GATEWAY_URL/anthropic/v1/messages +``` + +#### Prompt Caching + +```shell +curl -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "system": [ + { + "type": "text", + "text": "You are an AI assistant specialized in Python programming. You help users write clean, efficient Python code.", + "cache_control": {"type": "ephemeral"} + } + ], + "messages": [ + { + "role": "user", + "content": "Write a function to calculate fibonacci numbers." + } + ], + "max_tokens": 500 + }' \ + $GATEWAY_URL/anthropic/v1/messages +``` + +#### Tool Use (Function Calling) + +```shell +curl -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "messages": [ + { + "role": "user", + "content": "What is the weather in San Francisco?" + } + ], + "max_tokens": 500, + "tools": [ + { + "name": "get_weather", + "description": "Get the current weather in a given location", + "input_schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + } + }, + "required": ["location"] + } + } + ] + }' \ + $GATEWAY_URL/anthropic/v1/messages +``` + +## Troubleshooting + +If you encounter issues: + +1. **Verify your AWS credentials are correct and active** + + ```shell + # Check if credentials are properly configured + kubectl get secret envoy-ai-gateway-basic-aws-bedrock-anthropic-credentials -n default -o yaml + ``` + +2. **Check pod status** + + ```shell + kubectl get pods -n envoy-gateway-system + ``` + +3. **View controller logs** + + ```shell + kubectl logs -n envoy-ai-gateway-system deployment/ai-gateway-controller + ``` + +4. **View gateway pod logs** + + ```shell + kubectl logs -n envoy-gateway-system -l gateway.envoyproxy.io/owning-gateway-name=envoy-ai-gateway-basic + ``` + +### Common Errors + +| Error Code | Issue | Solution | +| ---------- | ----------------------------------------------- | -------------------------------------------------------------------- | +| 401/403 | Invalid credentials or insufficient permissions | Verify AWS credentials and ensure Bedrock permissions are granted | +| 404 | Model not found or not available in region | Check model ID and ensure model access is enabled in your AWS region | +| 429 | Rate limit exceeded | Implement rate limiting or request quota increase from AWS | +| 400 | Invalid request format | Verify request body matches Anthropic API format | +| 500 | AWS Bedrock internal error | Check AWS Bedrock service status and retry after a short delay | + +## Security Considerations + +When deploying in production: + +1. **Use IAM Roles for Service Accounts (IRSA)** in EKS instead of static credentials +2. **Implement request rate limiting** to control costs and prevent abuse +3. **Enable audit logging** to track API usage and detect anomalies +4. **Use least-privilege IAM policies** that only grant necessary permissions +5. **Rotate credentials regularly** if using static access keys +6. **Monitor token usage and costs** using the gateway's metrics + +## What's Next + +Now that you've connected AWS Bedrock with the native Anthropic API, explore these capabilities: + +- **[Usage-Based Rate Limiting](../../capabilities/traffic/usage-based-ratelimiting.md)** - Configure token-based rate limiting and cost controls +- **[Provider Fallback](../../capabilities/traffic/provider-fallback.md)** - Set up automatic failover between AWS Bedrock and other Anthropic providers +- **[Metrics and Monitoring](../../capabilities/observability/metrics.md)** - Monitor usage, costs, and performance metrics +- **[Model Virtualization](../../capabilities/traffic/model-virtualization.md)** - Create virtual model names that route to different backends + +## References + +- [AWS Bedrock Anthropic Models Documentation](https://aws.amazon.com/bedrock/anthropic/) +- [Anthropic API Reference](https://docs.anthropic.com/en/api) +- [AWS Bedrock Model IDs](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html) +- [AIGatewayRoute API Reference](../../api/api.mdx#aigatewayrouterule) diff --git a/site/docs/getting-started/connect-providers/index.md b/site/docs/getting-started/connect-providers/index.md index 2137c9ad63..fb23dc4abd 100644 --- a/site/docs/getting-started/connect-providers/index.md +++ b/site/docs/getting-started/connect-providers/index.md @@ -44,3 +44,4 @@ Choose your provider to get started: - [Connect OpenAI](./openai.md) - [Connect AWS Bedrock](./aws-bedrock.md) - [Connect Azure OpenAI](./azure-openai.md) +- [Connect GCP VertexAI](./gcp-vertexai.md) diff --git a/tests/extproc/extproc_test.go b/tests/extproc/extproc_test.go index 3a7b9442fe..64c0ae228e 100644 --- a/tests/extproc/extproc_test.go +++ b/tests/extproc/extproc_test.go @@ -36,6 +36,7 @@ const ( var ( openAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1"} awsBedrockSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAWSBedrock} + awsAnthropicSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAWSAnthropic} azureOpenAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAzureOpenAI, Version: "2025-01-01-preview"} gcpVertexAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaGCPVertexAI} gcpAnthropicAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaGCPAnthropic, Version: "vertex-2023-10-16"} diff --git a/tests/extproc/real_providers_test.go b/tests/extproc/real_providers_test.go index fc51add5a4..843816b01b 100644 --- a/tests/extproc/real_providers_test.go +++ b/tests/extproc/real_providers_test.go @@ -50,6 +50,10 @@ func TestWithRealProviders(t *testing.T) { CredentialFileLiteral: cc.AWSFileLiteral, Region: "us-east-1", }}}, + {Name: "aws-bedrock-anthropic", Schema: awsAnthropicSchema, Auth: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{ + CredentialFileLiteral: cc.AWSFileLiteral, + Region: "us-east-1", + }}}, {Name: "azure-openai", Schema: azureOpenAISchema, Auth: &filterapi.BackendAuth{ AzureAuth: &filterapi.AzureAuth{AccessToken: cc.AzureAccessToken}, }}, From 70183bcde0b7651b569dccb27cbddf004efa2ad8 Mon Sep 17 00:00:00 2001 From: Sebastian Poxhofer Date: Thu, 23 Oct 2025 17:38:47 +0200 Subject: [PATCH 02/15] Remove some doc blocks Signed-off-by: Sebastian Poxhofer --- .../aws-bedrock-anthropic.md | 41 +------------------ 1 file changed, 1 insertion(+), 40 deletions(-) diff --git a/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md b/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md index 2a63dd7b27..cd5e031304 100644 --- a/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md +++ b/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md @@ -28,6 +28,7 @@ Ensure you have: 2. AWS credentials with permissions to: - `bedrock:InvokeModel` - `bedrock:ListFoundationModels` + - `aws-marketplace:ViewSubscriptions` 3. Your AWS access key ID and secret access key 4. Enabled model access to Anthropic Claude models in your desired AWS region (e.g., `us-east-1`) - Go to the AWS Bedrock console and request access to Anthropic models @@ -144,20 +145,6 @@ curl -H "Content-Type: application/json" \ $GATEWAY_URL/anthropic/v1/messages ``` -## Available Anthropic Models on AWS Bedrock - -AWS Bedrock supports several Claude model versions. Here are some commonly used model IDs: - -| Model Name | AWS Bedrock Model ID | -| ---------------------------- | ----------------------------------------- | -| Claude 3.5 Sonnet (Oct 2024) | anthropic.claude-3-5-sonnet-20241022-v2:0 | -| Claude 3.5 Sonnet (Jun 2024) | anthropic.claude-3-5-sonnet-20240620-v1:0 | -| Claude 3 Opus | anthropic.claude-3-opus-20240229-v1:0 | -| Claude 3 Sonnet | anthropic.claude-3-sonnet-20240229-v1:0 | -| Claude 3 Haiku | anthropic.claude-3-haiku-20240307-v1:0 | - -> [!NOTE] -> Model availability varies by AWS region. Check the [AWS Bedrock documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html) for the complete list of supported models in your region. ## Configuring More Models @@ -316,29 +303,3 @@ If you encounter issues: | 400 | Invalid request format | Verify request body matches Anthropic API format | | 500 | AWS Bedrock internal error | Check AWS Bedrock service status and retry after a short delay | -## Security Considerations - -When deploying in production: - -1. **Use IAM Roles for Service Accounts (IRSA)** in EKS instead of static credentials -2. **Implement request rate limiting** to control costs and prevent abuse -3. **Enable audit logging** to track API usage and detect anomalies -4. **Use least-privilege IAM policies** that only grant necessary permissions -5. **Rotate credentials regularly** if using static access keys -6. **Monitor token usage and costs** using the gateway's metrics - -## What's Next - -Now that you've connected AWS Bedrock with the native Anthropic API, explore these capabilities: - -- **[Usage-Based Rate Limiting](../../capabilities/traffic/usage-based-ratelimiting.md)** - Configure token-based rate limiting and cost controls -- **[Provider Fallback](../../capabilities/traffic/provider-fallback.md)** - Set up automatic failover between AWS Bedrock and other Anthropic providers -- **[Metrics and Monitoring](../../capabilities/observability/metrics.md)** - Monitor usage, costs, and performance metrics -- **[Model Virtualization](../../capabilities/traffic/model-virtualization.md)** - Create virtual model names that route to different backends - -## References - -- [AWS Bedrock Anthropic Models Documentation](https://aws.amazon.com/bedrock/anthropic/) -- [Anthropic API Reference](https://docs.anthropic.com/en/api) -- [AWS Bedrock Model IDs](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html) -- [AIGatewayRoute API Reference](../../api/api.mdx#aigatewayrouterule) From 060f83e3f2afd67e0ee6975e66ec67d8879aecbc Mon Sep 17 00:00:00 2001 From: secustor Date: Thu, 23 Oct 2025 22:12:15 +0200 Subject: [PATCH 03/15] docs: merge aws and aws anthropic docs Signed-off-by: secustor --- examples/basic/aws-bedrock-anthropic.yaml | 93 ------ examples/basic/aws.yaml | 37 +++ .../aws-bedrock-anthropic.md | 305 ------------------ .../connect-providers/aws-bedrock.md | 224 ++++++++++++- 4 files changed, 253 insertions(+), 406 deletions(-) delete mode 100644 examples/basic/aws-bedrock-anthropic.yaml delete mode 100644 site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md diff --git a/examples/basic/aws-bedrock-anthropic.yaml b/examples/basic/aws-bedrock-anthropic.yaml deleted file mode 100644 index b2db5df483..0000000000 --- a/examples/basic/aws-bedrock-anthropic.yaml +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright Envoy AI Gateway Authors -# SPDX-License-Identifier: Apache-2.0 -# The full text of the Apache license is available in the LICENSE file at -# the root of the repo. - -apiVersion: aigateway.envoyproxy.io/v1alpha1 -kind: AIGatewayRoute -metadata: - name: envoy-ai-gateway-basic-aws-bedrock-anthropic - namespace: default -spec: - parentRefs: - - name: envoy-ai-gateway-basic - kind: Gateway - group: gateway.networking.k8s.io - rules: - - matches: - - headers: - - type: Exact - name: x-ai-eg-model - value: anthropic.claude-3-5-sonnet-20241022-v2:0 - backendRefs: - - name: envoy-ai-gateway-basic-aws-bedrock-anthropic ---- -apiVersion: aigateway.envoyproxy.io/v1alpha1 -kind: AIServiceBackend -metadata: - name: envoy-ai-gateway-basic-aws-bedrock-anthropic - namespace: default -spec: - schema: - name: AWSAnthropic - version: bedrock-2023-05-31 - backendRef: - name: envoy-ai-gateway-basic-aws-bedrock-anthropic - kind: Backend - group: gateway.envoyproxy.io ---- -apiVersion: aigateway.envoyproxy.io/v1alpha1 -kind: BackendSecurityPolicy -metadata: - name: envoy-ai-gateway-basic-aws-bedrock-anthropic-credentials - namespace: default -spec: - targetRefs: - - group: aigateway.envoyproxy.io - kind: AIServiceBackend - name: envoy-ai-gateway-basic-aws-bedrock-anthropic - type: AWSCredentials - awsCredentials: - region: us-east-1 - credentialsFile: - secretRef: - name: envoy-ai-gateway-basic-aws-bedrock-anthropic-credentials ---- -apiVersion: gateway.envoyproxy.io/v1alpha1 -kind: Backend -metadata: - name: envoy-ai-gateway-basic-aws-bedrock-anthropic - namespace: default -spec: - endpoints: - - fqdn: - hostname: bedrock-runtime.us-east-1.amazonaws.com - port: 443 ---- -apiVersion: gateway.networking.k8s.io/v1alpha3 -kind: BackendTLSPolicy -metadata: - name: envoy-ai-gateway-basic-aws-bedrock-anthropic-tls - namespace: default -spec: - targetRefs: - - group: "gateway.envoyproxy.io" - kind: Backend - name: envoy-ai-gateway-basic-aws-bedrock-anthropic - validation: - wellKnownCACertificates: "System" - hostname: bedrock-runtime.us-east-1.amazonaws.com ---- -apiVersion: v1 -kind: Secret -metadata: - name: envoy-ai-gateway-basic-aws-bedrock-anthropic-credentials - namespace: default -type: Opaque -stringData: - # Replace this with your AWS credentials. - # You can also use AWS IAM roles for service accounts (IRSA) in EKS. - credentials: | - [default] - aws_access_key_id = AWS_ACCESS_KEY_ID - aws_secret_access_key = AWS_SECRET_ACCESS_KEY diff --git a/examples/basic/aws.yaml b/examples/basic/aws.yaml index 7bc37a4b2b..784972326c 100644 --- a/examples/basic/aws.yaml +++ b/examples/basic/aws.yaml @@ -23,6 +23,25 @@ spec: - name: envoy-ai-gateway-basic-aws --- apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: AIGatewayRoute +metadata: + name: envoy-ai-gateway-basic-aws-bedrock-anthropic + namespace: default +spec: + parentRefs: + - name: envoy-ai-gateway-basic + kind: Gateway + group: gateway.networking.k8s.io + rules: + - matches: + - headers: + - type: Exact + name: x-ai-eg-model + value: anthropic.claude-3-5-sonnet-20241022-v2:0 + backendRefs: + - name: envoy-ai-gateway-basic-aws-bedrock-anthropic +--- +apiVersion: aigateway.envoyproxy.io/v1alpha1 kind: AIServiceBackend metadata: name: envoy-ai-gateway-basic-aws @@ -36,6 +55,20 @@ spec: group: gateway.envoyproxy.io --- apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: AIServiceBackend +metadata: + name: envoy-ai-gateway-basic-aws-bedrock-anthropic + namespace: default +spec: + schema: + name: AWSAnthropic + version: bedrock-2023-05-31 + backendRef: + name: envoy-ai-gateway-basic-aws + kind: Backend + group: gateway.envoyproxy.io +--- +apiVersion: aigateway.envoyproxy.io/v1alpha1 kind: BackendSecurityPolicy metadata: name: envoy-ai-gateway-basic-aws-credentials @@ -45,6 +78,9 @@ spec: - group: aigateway.envoyproxy.io kind: AIServiceBackend name: envoy-ai-gateway-basic-aws + - group: aigateway.envoyproxy.io + kind: AIServiceBackend + name: envoy-ai-gateway-basic-aws-bedrock-anthropic type: AWSCredentials awsCredentials: region: us-east-1 @@ -85,6 +121,7 @@ metadata: type: Opaque stringData: # Replace this with your AWS credentials. + # You can also use AWS IAM roles for service accounts (IRSA) in EKS. credentials: | [default] aws_access_key_id = AWS_ACCESS_KEY_ID diff --git a/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md b/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md deleted file mode 100644 index cd5e031304..0000000000 --- a/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md +++ /dev/null @@ -1,305 +0,0 @@ ---- -id: aws-bedrock-anthropic -title: Connect AWS Bedrock (Anthropic Native API) -sidebar_position: 4 ---- - -# Connect AWS Bedrock with Anthropic Native API - -This guide shows you how to configure Envoy AI Gateway to use Anthropic models on AWS Bedrock with the **native Anthropic Messages API format**. This allows you to use the `/anthropic/v1/messages` endpoint to call Claude models hosted on AWS Bedrock. - -> [!NOTE] -> If you want to use AWS Bedrock models with the OpenAI-compatible format (`/v1/chat/completions`), see the [AWS Bedrock guide](./aws-bedrock.md) instead. - -## Prerequisites - -Before you begin, you'll need: - -- AWS credentials with access to Bedrock -- Basic setup completed from the [Basic Usage](../basic-usage.md) guide -- Basic configuration removed as described in the [Advanced Configuration](./index.md) overview -- Model access enabled for Anthropic Claude models in your AWS region - -## AWS Credentials Setup - -Ensure you have: - -1. An AWS account with Bedrock access enabled -2. AWS credentials with permissions to: - - `bedrock:InvokeModel` - - `bedrock:ListFoundationModels` - - `aws-marketplace:ViewSubscriptions` -3. Your AWS access key ID and secret access key -4. Enabled model access to Anthropic Claude models in your desired AWS region (e.g., `us-east-1`) - - Go to the AWS Bedrock console and request access to Anthropic models - - If you want to use a different AWS region, you must update all instances of `us-east-1` with the desired region in the configuration file - -> [!TIP] -> Consider using AWS IAM roles and limited-scope credentials for production environments. For EKS clusters, AWS IAM Roles for Service Accounts (IRSA) is recommended. - -## Why Use the Native Anthropic API? - -The native Anthropic API provides several advantages when working with Claude models: - -- **Full feature support**: Access all Anthropic-specific features like extended thinking, prompt caching, and tool use -- **Consistent API**: Use the same API format you would with Anthropic's direct API -- **Better compatibility**: Avoid potential translation issues between OpenAI and Anthropic formats -- **Feature parity**: Get immediate access to new Anthropic features as they're released - -## Configuration Steps - -> [!IMPORTANT] -> Ensure you have followed the prerequisite steps in [Connect Providers](../connect-providers/) before proceeding. - -### 1. Download Configuration Template - -```shell -curl -O https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/examples/basic/aws-bedrock-anthropic.yaml -``` - -### 2. Configure AWS Credentials - -Edit the `aws-bedrock-anthropic.yaml` file to replace these placeholder values: - -- `AWS_ACCESS_KEY_ID`: Your AWS access key ID -- `AWS_SECRET_ACCESS_KEY`: Your AWS secret access key -- Update the `region` field if you're using a region other than `us-east-1` -- Update the model ID in the `value` field if you want to use a different Claude model - -> [!CAUTION] -> Make sure to keep your AWS credentials secure and never commit them to version control. The credentials will be stored in Kubernetes secrets. - -### 3. Apply Configuration - -Apply the updated configuration and wait for the Gateway pod to be ready. If you already have a Gateway running, the secret credential update will be picked up automatically in a few seconds. - -```shell -kubectl apply -f aws-bedrock-anthropic.yaml - -kubectl wait pods --timeout=2m \ - -l gateway.envoyproxy.io/owning-gateway-name=envoy-ai-gateway-basic \ - -n envoy-gateway-system \ - --for=condition=Ready -``` - -### 4. Test the Configuration - -You should have set `$GATEWAY_URL` as part of the basic setup before connecting to providers. See the [Basic Usage](../basic-usage.md) page for instructions. - -Test your configuration using the native Anthropic Messages API format: - -```shell -curl -H "Content-Type: application/json" \ - -d '{ - "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", - "messages": [ - { - "role": "user", - "content": "What is the capital of France?" - } - ], - "max_tokens": 100 - }' \ - $GATEWAY_URL/anthropic/v1/messages -``` - -Expected output: - -```json -{ - "id": "msg_01XFDUDYJgAACzvnptvVoYEL", - "type": "message", - "role": "assistant", - "content": [ - { - "type": "text", - "text": "The capital of France is Paris." - } - ], - "model": "claude-3-5-sonnet-20241022", - "stop_reason": "end_turn", - "usage": { - "input_tokens": 13, - "output_tokens": 8 - } -} -``` - -### 5. Test Streaming - -The native Anthropic API also supports streaming responses: - -```shell -curl -H "Content-Type: application/json" \ - -d '{ - "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", - "messages": [ - { - "role": "user", - "content": "Count from 1 to 5." - } - ], - "max_tokens": 100, - "stream": true - }' \ - $GATEWAY_URL/anthropic/v1/messages -``` - - -## Configuring More Models - -To use additional models, add more `AIGatewayRoute` rules to the configuration file. Each rule should specify a different model ID: - -```yaml -apiVersion: aigateway.envoyproxy.io/v1alpha1 -kind: AIGatewayRoute -metadata: - name: envoy-ai-gateway-basic-aws-bedrock-anthropic - namespace: default -spec: - parentRefs: - - name: envoy-ai-gateway-basic - kind: Gateway - group: gateway.networking.k8s.io - rules: - # Claude 3.5 Sonnet (Oct 2024) - - matches: - - headers: - - type: Exact - name: x-ai-eg-model - value: anthropic.claude-3-5-sonnet-20241022-v2:0 - backendRefs: - - name: envoy-ai-gateway-basic-aws-bedrock-anthropic - # Claude 3 Opus - - matches: - - headers: - - type: Exact - name: x-ai-eg-model - value: anthropic.claude-3-opus-20240229-v1:0 - backendRefs: - - name: envoy-ai-gateway-basic-aws-bedrock-anthropic -``` - -## Advanced Features - -### Using Anthropic-Specific Features - -Since this configuration uses the native Anthropic API, you have full access to Anthropic-specific features: - -#### Extended Thinking - -```shell -curl -H "Content-Type: application/json" \ - -d '{ - "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", - "messages": [ - { - "role": "user", - "content": "Solve this puzzle: A farmer needs to cross a river with a fox, chicken, and bag of grain. The boat can only hold the farmer and one item. How does the farmer get everything across safely?" - } - ], - "max_tokens": 1000, - "thinking": { - "type": "enabled", - "budget_tokens": 5000 - } - }' \ - $GATEWAY_URL/anthropic/v1/messages -``` - -#### Prompt Caching - -```shell -curl -H "Content-Type: application/json" \ - -d '{ - "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", - "system": [ - { - "type": "text", - "text": "You are an AI assistant specialized in Python programming. You help users write clean, efficient Python code.", - "cache_control": {"type": "ephemeral"} - } - ], - "messages": [ - { - "role": "user", - "content": "Write a function to calculate fibonacci numbers." - } - ], - "max_tokens": 500 - }' \ - $GATEWAY_URL/anthropic/v1/messages -``` - -#### Tool Use (Function Calling) - -```shell -curl -H "Content-Type: application/json" \ - -d '{ - "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", - "messages": [ - { - "role": "user", - "content": "What is the weather in San Francisco?" - } - ], - "max_tokens": 500, - "tools": [ - { - "name": "get_weather", - "description": "Get the current weather in a given location", - "input_schema": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA" - } - }, - "required": ["location"] - } - } - ] - }' \ - $GATEWAY_URL/anthropic/v1/messages -``` - -## Troubleshooting - -If you encounter issues: - -1. **Verify your AWS credentials are correct and active** - - ```shell - # Check if credentials are properly configured - kubectl get secret envoy-ai-gateway-basic-aws-bedrock-anthropic-credentials -n default -o yaml - ``` - -2. **Check pod status** - - ```shell - kubectl get pods -n envoy-gateway-system - ``` - -3. **View controller logs** - - ```shell - kubectl logs -n envoy-ai-gateway-system deployment/ai-gateway-controller - ``` - -4. **View gateway pod logs** - - ```shell - kubectl logs -n envoy-gateway-system -l gateway.envoyproxy.io/owning-gateway-name=envoy-ai-gateway-basic - ``` - -### Common Errors - -| Error Code | Issue | Solution | -| ---------- | ----------------------------------------------- | -------------------------------------------------------------------- | -| 401/403 | Invalid credentials or insufficient permissions | Verify AWS credentials and ensure Bedrock permissions are granted | -| 404 | Model not found or not available in region | Check model ID and ensure model access is enabled in your AWS region | -| 429 | Rate limit exceeded | Implement rate limiting or request quota increase from AWS | -| 400 | Invalid request format | Verify request body matches Anthropic API format | -| 500 | AWS Bedrock internal error | Check AWS Bedrock service status and retry after a short delay | - diff --git a/site/docs/getting-started/connect-providers/aws-bedrock.md b/site/docs/getting-started/connect-providers/aws-bedrock.md index 680af63a1d..91ed0e4389 100644 --- a/site/docs/getting-started/connect-providers/aws-bedrock.md +++ b/site/docs/getting-started/connect-providers/aws-bedrock.md @@ -6,7 +6,7 @@ sidebar_position: 3 # Connect AWS Bedrock -This guide will help you configure Envoy AI Gateway to work with AWS Bedrock's foundation models. +This guide will help you configure Envoy AI Gateway to work with AWS Bedrock's foundation models, including Llama, Anthropic Claude, and other models available on AWS Bedrock. ## Prerequisites @@ -24,6 +24,7 @@ Ensure you have: 2. AWS credentials with permissions to: - `bedrock:InvokeModel` - `bedrock:ListFoundationModels` + - `aws-marketplace:ViewSubscriptions` ( for Anthropic models ) 3. Your AWS access key ID and secret access key 4. Enabled model access to "Llama 3.2 1B Instruct" in the `us-east-1` region - If you want to use a different AWS region, you must update all instances of the string @@ -76,6 +77,8 @@ kubectl wait pods --timeout=2m \ You should have set `$GATEWAY_URL` as part of the basic setup before connecting to providers. See the [Basic Usage](../basic-usage.md) page for instructions. +To access a Llama model with chat completion endpoint: + ```shell curl -H "Content-Type: application/json" \ -d '{ @@ -90,23 +93,119 @@ curl -H "Content-Type: application/json" \ $GATEWAY_URL/v1/chat/completions ``` +To access an Anthropic model with chat completion endpoint: + +```shell +curl -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "messages": [ + { + "role": "user", + "content": "What is capital of France?" + } + ], + "max_completion_tokens": 100 + }' \ + $GATEWAY_URL/v1/chat/completions +``` + +Expected output: + +```json +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": "The capital of France is Paris.", + "role": "assistant" + } + } + ], + "object": "chat.completion", + "usage": { "completion_tokens": 8, "prompt_tokens": 13, "total_tokens": 21 } +} +``` + +You can also access an Anthropic model with native Anthropic messages endpoint: + +```shell +curl -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "messages": [ + { + "role": "user", + "content": "What is capital of France?" + } + ], + "max_tokens": 100 + }' \ + $GATEWAY_URL/anthropic/v1/messages +``` + +Expected output: + +```json +{ + "id": "msg_01XFDUDYJgAACzvnptvVoYEL", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "The capital of France is Paris." + } + ], + "model": "claude-3-5-sonnet-20241022", + "stop_reason": "end_turn", + "usage": { + "input_tokens": 13, + "output_tokens": 8 + } +} +``` + ## Troubleshooting If you encounter issues: -1. Verify your AWS credentials are correct and active -2. Check pod status: +1. **Verify your AWS credentials are correct and active** + + ```shell + # Check if credentials are properly configured + kubectl get secret -n default -o yaml + ``` + +2. **Check pod status** + ```shell kubectl get pods -n envoy-gateway-system ``` -3. View controller logs: + +3. **View controller logs** + ```shell kubectl logs -n envoy-ai-gateway-system deployment/ai-gateway-controller ``` -4. Common errors: - - 401/403: Invalid credentials or insufficient permissions - - 404: Model not found or not available in region - - 429: Rate limit exceeded + +4. **View gateway pod logs** + + ```shell + kubectl logs -n envoy-gateway-system -l gateway.envoyproxy.io/owning-gateway-name=envoy-ai-gateway-basic + ``` + +### Common Errors + +| Error Code | Issue | Solution | +| ---------- | ----------------------------------------------- | -------------------------------------------------------------------- | +| 401/403 | Invalid credentials or insufficient permissions | Verify AWS credentials and ensure Bedrock permissions are granted | +| 404 | Model not found or not available in region | Check model ID and ensure model access is enabled in your AWS region | +| 429 | Rate limit exceeded | Implement rate limiting or request quota increase from AWS | +| 400 | Invalid request format | Verify request body matches the expected API format | +| 500 | AWS Bedrock internal error | Check AWS Bedrock service status and retry after a short delay | ## Configuring More Models @@ -133,6 +232,115 @@ spec: - name: envoy-ai-gateway-basic-aws ``` +## Using Anthropic Native API + +When using Anthropic models on AWS Bedrock, you have two options: + +1. **OpenAI-compatible format** (`/v1/chat/completions`) - Works with most models but may not support all Anthropic-specific features +2. **Native Anthropic API** (`/anthropic/v1/messages`) - Provides full access to Anthropic-specific features (only for Anthropic models) + +### Streaming with Native Anthropic API + +The native Anthropic API also supports streaming responses: + +```shell +curl -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "messages": [ + { + "role": "user", + "content": "Count from 1 to 5." + } + ], + "max_tokens": 100, + "stream": true + }' \ + $GATEWAY_URL/anthropic/v1/messages +``` + +## Advanced Features with Anthropic Models + +Since the gateway supports the native Anthropic API, you have full access to Anthropic-specific features: + +### Extended Thinking + +```shell +curl -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "messages": [ + { + "role": "user", + "content": "Solve this puzzle: A farmer needs to cross a river with a fox, chicken, and bag of grain. The boat can only hold the farmer and one item. How does the farmer get everything across safely?" + } + ], + "max_tokens": 1000, + "thinking": { + "type": "enabled", + "budget_tokens": 5000 + } + }' \ + $GATEWAY_URL/anthropic/v1/messages +``` + +### Prompt Caching + +```shell +curl -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "system": [ + { + "type": "text", + "text": "You are an AI assistant specialized in Python programming. You help users write clean, efficient Python code.", + "cache_control": {"type": "ephemeral"} + } + ], + "messages": [ + { + "role": "user", + "content": "Write a function to calculate fibonacci numbers." + } + ], + "max_tokens": 500 + }' \ + $GATEWAY_URL/anthropic/v1/messages +``` + +### Tool Use (Function Calling) + +```shell +curl -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "messages": [ + { + "role": "user", + "content": "What is the weather in San Francisco?" + } + ], + "max_tokens": 500, + "tools": [ + { + "name": "get_weather", + "description": "Get the current weather in a given location", + "input_schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + } + }, + "required": ["location"] + } + } + ] + }' \ + $GATEWAY_URL/anthropic/v1/messages +``` + [AIGatewayRouteRule]: ../../api/api.mdx#aigatewayrouterule [model ID]: https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html [Claude 3 Sonnet]: https://docs.anthropic.com/en/docs/about-claude/models#model-comparison-table From 460c72e67b330a4f408113755b6641f209089f8f Mon Sep 17 00:00:00 2001 From: secustor Date: Thu, 23 Oct 2025 23:33:15 +0200 Subject: [PATCH 04/15] refactor: extract ResponseHandler and applyModelNameOverride Signed-off-by: secustor --- .../translator/anthropic_awsanthropic.go | 94 +----------- .../translator/anthropic_gcpanthropic.go | 94 +----------- .../extproc/translator/anthropic_helper.go | 143 ++++++++++++++++++ 3 files changed, 155 insertions(+), 176 deletions(-) create mode 100644 internal/extproc/translator/anthropic_helper.go diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go index 735c9a8119..6968fd731c 100644 --- a/internal/extproc/translator/anthropic_awsanthropic.go +++ b/internal/extproc/translator/anthropic_awsanthropic.go @@ -6,14 +6,12 @@ package translator import ( - "bytes" "encoding/json" "fmt" "io" "maps" "net/url" - "github.com/anthropics/anthropic-sdk-go" extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic" @@ -27,14 +25,15 @@ func NewAnthropicToAWSAnthropicTranslator(apiVersion string, modelNameOverride i return &anthropicToAWSAnthropicTranslator{ apiVersion: apiVersion, modelNameOverride: modelNameOverride, + responseHandler: newAnthropicResponseHandler(), } } type anthropicToAWSAnthropicTranslator struct { - // TODO: reuse anthropicToAnthropicTranslator and embed it here to avoid code duplication. apiVersion string modelNameOverride internalapi.ModelNameOverride requestModel internalapi.RequestModel + responseHandler *anthropicResponseHandler } // RequestBody implements [AnthropicMessagesTranslator.RequestBody] for Anthropic to AWS Bedrock Anthropic translation. @@ -50,10 +49,7 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(_ []byte, body *anthropi maps.Copy(anthropicReq, *body) // Apply model name override if configured. - a.requestModel = modelName - if a.modelNameOverride != "" { - a.requestModel = a.modelNameOverride - } + a.requestModel = applyModelNameOverride(modelName, a.modelNameOverride) // Remove the model field since AWS Bedrock doesn't want it in the body (it's in the path). delete(anthropicReq, "model") @@ -98,87 +94,9 @@ func (a *anthropicToAWSAnthropicTranslator) ResponseHeaders(_ map[string]string) } // ResponseBody implements [AnthropicMessagesTranslator.ResponseBody] for Anthropic to AWS Bedrock Anthropic. -// This is essentially a passthrough since AWS Bedrock returns the native Anthropic response format. -func (a *anthropicToAWSAnthropicTranslator) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool) ( +// This delegates to the shared anthropicResponseHandler since AWS Bedrock returns the native Anthropic response format. +func (a *anthropicToAWSAnthropicTranslator) ResponseBody(headers map[string]string, body io.Reader, endOfStream bool) ( headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error, ) { - // Read the response body for both streaming and non-streaming. - bodyBytes, err := io.ReadAll(body) - if err != nil { - return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err) - } - - // For streaming chunks, parse SSE format to extract token usage. - if !endOfStream { - // Parse SSE format - split by lines and look for data: lines. - for line := range bytes.Lines(bodyBytes) { - line = bytes.TrimSpace(line) - if bytes.HasPrefix(line, dataPrefix) { - jsonData := bytes.TrimPrefix(line, dataPrefix) - - var eventData map[string]any - if unmarshalErr := json.Unmarshal(jsonData, &eventData); unmarshalErr != nil { - // Skip lines with invalid JSON (like ping events or malformed data). - continue - } - if eventType, ok := eventData["type"].(string); ok { - switch eventType { - case "message_start": - // Extract input tokens from message.usage. - if messageData, ok := eventData["message"].(map[string]any); ok { - if usageData, ok := messageData["usage"].(map[string]any); ok { - if inputTokens, ok := usageData["input_tokens"].(float64); ok { - tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec - } - // Some message_start events may include initial output tokens. - if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 { - tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec - } - tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens - } - } - - case "message_delta": - if usageData, ok := eventData["usage"].(map[string]any); ok { - if outputTokens, ok := usageData["output_tokens"].(float64); ok { - // Add to existing output tokens (in case message_start had some initial ones). - tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec - tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens - } - } - } - } - } - } - - return nil, &extprocv3.BodyMutation{ - Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, - }, tokenUsage, a.requestModel, nil - } - - // Parse the Anthropic response to extract token usage. - var anthropicResp anthropic.Message - if err = json.Unmarshal(bodyBytes, &anthropicResp); err != nil { - // If we can't parse as Anthropic format, pass through as-is. - return nil, &extprocv3.BodyMutation{ - Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, - }, LLMTokenUsage{}, a.requestModel, nil - } - - // Extract token usage from the response. - tokenUsage = LLMTokenUsage{ - InputTokens: uint32(anthropicResp.Usage.InputTokens), //nolint:gosec - OutputTokens: uint32(anthropicResp.Usage.OutputTokens), //nolint:gosec - TotalTokens: uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec - CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens), //nolint:gosec - } - - // Pass through the response body unchanged since both input and output are Anthropic format. - headerMutation = &extprocv3.HeaderMutation{} - setContentLength(headerMutation, bodyBytes) - bodyMutation = &extprocv3.BodyMutation{ - Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, - } - - return headerMutation, bodyMutation, tokenUsage, a.requestModel, nil + return a.responseHandler.ResponseBody(headers, body, endOfStream, a.requestModel) } diff --git a/internal/extproc/translator/anthropic_gcpanthropic.go b/internal/extproc/translator/anthropic_gcpanthropic.go index 37a5d4a5a3..f5a8bbc799 100644 --- a/internal/extproc/translator/anthropic_gcpanthropic.go +++ b/internal/extproc/translator/anthropic_gcpanthropic.go @@ -6,13 +6,11 @@ package translator import ( - "bytes" "encoding/json" "fmt" "io" "maps" - "github.com/anthropics/anthropic-sdk-go" extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic" @@ -25,14 +23,15 @@ func NewAnthropicToGCPAnthropicTranslator(apiVersion string, modelNameOverride i return &anthropicToGCPAnthropicTranslator{ apiVersion: apiVersion, modelNameOverride: modelNameOverride, + responseHandler: newAnthropicResponseHandler(), } } type anthropicToGCPAnthropicTranslator struct { - // TODO: reuse anthropicToAnthropicTranslator and embed it here to avoid code duplication. apiVersion string modelNameOverride internalapi.ModelNameOverride requestModel internalapi.RequestModel + responseHandler *anthropicResponseHandler } // RequestBody implements [AnthropicMessagesTranslator.RequestBody] for Anthropic to GCP Anthropic translation. @@ -48,10 +47,7 @@ func (a *anthropicToGCPAnthropicTranslator) RequestBody(_ []byte, body *anthropi maps.Copy(anthropicReq, *body) // Apply model name override if configured. - a.requestModel = modelName - if a.modelNameOverride != "" { - a.requestModel = a.modelNameOverride - } + a.requestModel = applyModelNameOverride(modelName, a.modelNameOverride) // Remove the model field since GCP doesn't want it in the body. delete(anthropicReq, "model") @@ -90,87 +86,9 @@ func (a *anthropicToGCPAnthropicTranslator) ResponseHeaders(_ map[string]string) } // ResponseBody implements [AnthropicMessagesTranslator.ResponseBody] for Anthropic to GCP Anthropic. -// This is essentially a passthrough since both use the same Anthropic response format. -func (a *anthropicToGCPAnthropicTranslator) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool) ( +// This delegates to the shared anthropicResponseHandler since GCP Vertex AI returns the native Anthropic response format. +func (a *anthropicToGCPAnthropicTranslator) ResponseBody(headers map[string]string, body io.Reader, endOfStream bool) ( headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error, ) { - // Read the response body for both streaming and non-streaming. - bodyBytes, err := io.ReadAll(body) - if err != nil { - return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err) - } - - // For streaming chunks, parse SSE format to extract token usage. - if !endOfStream { - // Parse SSE format - split by lines and look for data: lines. - for line := range bytes.Lines(bodyBytes) { - line = bytes.TrimSpace(line) - if bytes.HasPrefix(line, dataPrefix) { - jsonData := bytes.TrimPrefix(line, dataPrefix) - - var eventData map[string]any - if unmarshalErr := json.Unmarshal(jsonData, &eventData); unmarshalErr != nil { - // Skip lines with invalid JSON (like ping events or malformed data). - continue - } - if eventType, ok := eventData["type"].(string); ok { - switch eventType { - case "message_start": - // Extract input tokens from message.usage. - if messageData, ok := eventData["message"].(map[string]any); ok { - if usageData, ok := messageData["usage"].(map[string]any); ok { - if inputTokens, ok := usageData["input_tokens"].(float64); ok { - tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec - } - // Some message_start events may include initial output tokens. - if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 { - tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec - } - tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens - } - } - - case "message_delta": - if usageData, ok := eventData["usage"].(map[string]any); ok { - if outputTokens, ok := usageData["output_tokens"].(float64); ok { - // Add to existing output tokens (in case message_start had some initial ones). - tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec - tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens - } - } - } - } - } - } - - return nil, &extprocv3.BodyMutation{ - Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, - }, tokenUsage, a.requestModel, nil - } - - // Parse the Anthropic response to extract token usage. - var anthropicResp anthropic.Message - if err = json.Unmarshal(bodyBytes, &anthropicResp); err != nil { - // If we can't parse as Anthropic format, pass through as-is. - return nil, &extprocv3.BodyMutation{ - Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, - }, LLMTokenUsage{}, a.requestModel, nil - } - - // Extract token usage from the response. - tokenUsage = LLMTokenUsage{ - InputTokens: uint32(anthropicResp.Usage.InputTokens), //nolint:gosec - OutputTokens: uint32(anthropicResp.Usage.OutputTokens), //nolint:gosec - TotalTokens: uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec - CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens), //nolint:gosec - } - - // Pass through the response body unchanged since both input and output are Anthropic format. - headerMutation = &extprocv3.HeaderMutation{} - setContentLength(headerMutation, bodyBytes) - bodyMutation = &extprocv3.BodyMutation{ - Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, - } - - return headerMutation, bodyMutation, tokenUsage, a.requestModel, nil + return a.responseHandler.ResponseBody(headers, body, endOfStream, a.requestModel) } diff --git a/internal/extproc/translator/anthropic_helper.go b/internal/extproc/translator/anthropic_helper.go new file mode 100644 index 0000000000..44cdc08d09 --- /dev/null +++ b/internal/extproc/translator/anthropic_helper.go @@ -0,0 +1,143 @@ +// Copyright Envoy AI Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package translator + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + + "github.com/anthropics/anthropic-sdk-go" + extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" + + "github.com/envoyproxy/ai-gateway/internal/internalapi" +) + +// anthropicResponseHandler provides shared response handling logic for Anthropic-compatible APIs. +// This handler is stateless and used by AWS Bedrock and GCP Vertex AI translators to avoid code duplication. +type anthropicResponseHandler struct{} + +// newAnthropicResponseHandler creates a new stateless response handler. +func newAnthropicResponseHandler() *anthropicResponseHandler { + return &anthropicResponseHandler{} +} + +// ResponseBody handles both streaming and non-streaming Anthropic API responses. +// It extracts token usage information and returns the response unchanged (passthrough). +// The requestModel parameter is used to populate the responseModel return value. +func (h *anthropicResponseHandler) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool, requestModel internalapi.RequestModel) ( + headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error, +) { + // Read the response body for both streaming and non-streaming. + bodyBytes, err := io.ReadAll(body) + if err != nil { + return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err) + } + + // For streaming chunks, parse SSE format to extract token usage. + if !endOfStream { + tokenUsage = h.extractTokenUsageFromSSE(bodyBytes) + return nil, &extprocv3.BodyMutation{ + Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, + }, tokenUsage, requestModel, nil + } + + // For non-streaming responses, parse the complete Anthropic response. + tokenUsage, err = h.extractTokenUsageFromResponse(bodyBytes) + if err != nil { + // If we can't parse as Anthropic format, pass through as-is. + return nil, &extprocv3.BodyMutation{ + Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, + }, LLMTokenUsage{}, requestModel, nil + } + + // Pass through the response body unchanged since both input and output are Anthropic format. + headerMutation = &extprocv3.HeaderMutation{} + setContentLength(headerMutation, bodyBytes) + bodyMutation = &extprocv3.BodyMutation{ + Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, + } + + return headerMutation, bodyMutation, tokenUsage, requestModel, nil +} + +// extractTokenUsageFromSSE parses SSE (Server-Sent Events) format streaming responses +// to extract token usage information from message_start and message_delta events. +func (h *anthropicResponseHandler) extractTokenUsageFromSSE(bodyBytes []byte) LLMTokenUsage { + var tokenUsage LLMTokenUsage + + // Parse SSE format - split by lines and look for data: lines. + for line := range bytes.Lines(bodyBytes) { + line = bytes.TrimSpace(line) + if !bytes.HasPrefix(line, dataPrefix) { + continue + } + jsonData := bytes.TrimPrefix(line, dataPrefix) + + var eventData map[string]any + if unmarshalErr := json.Unmarshal(jsonData, &eventData); unmarshalErr != nil { + // Skip lines with invalid JSON (like ping events or malformed data). + continue + } + + if eventType, ok := eventData["type"].(string); ok { + switch eventType { + case "message_start": + // Extract input tokens from message.usage. + if messageData, ok := eventData["message"].(map[string]any); ok { + if usageData, ok := messageData["usage"].(map[string]any); ok { + if inputTokens, ok := usageData["input_tokens"].(float64); ok { + tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec + } + // Some message_start events may include initial output tokens. + if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 { + tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec + } + tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens + } + } + + case "message_delta": + if usageData, ok := eventData["usage"].(map[string]any); ok { + if outputTokens, ok := usageData["output_tokens"].(float64); ok { + // Add to existing output tokens (in case message_start had some initial ones). + tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec + tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens + } + } + } + } + } + + return tokenUsage +} + +// extractTokenUsageFromResponse parses a complete (non-streaming) Anthropic response +// to extract token usage information. +func (h *anthropicResponseHandler) extractTokenUsageFromResponse(bodyBytes []byte) (LLMTokenUsage, error) { + var anthropicResp anthropic.Message + if err := json.Unmarshal(bodyBytes, &anthropicResp); err != nil { + return LLMTokenUsage{}, err + } + + tokenUsage := LLMTokenUsage{ + InputTokens: uint32(anthropicResp.Usage.InputTokens), //nolint:gosec + OutputTokens: uint32(anthropicResp.Usage.OutputTokens), //nolint:gosec + TotalTokens: uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec + CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens), //nolint:gosec + } + + return tokenUsage, nil +} + +// applyModelNameOverride applies model name override logic used by AWS and GCP translators. +func applyModelNameOverride(originalModel internalapi.RequestModel, override internalapi.ModelNameOverride) internalapi.RequestModel { + if override != "" { + return override + } + return originalModel +} From bf5ded23e9d2db78c56be5d0a8a9871e3281755f Mon Sep 17 00:00:00 2001 From: secustor Date: Fri, 24 Oct 2025 10:11:38 +0200 Subject: [PATCH 05/15] refactor: extract Anthropic request header generation for AWS and GCP Signed-off-by: secustor --- .../translator/anthropic_awsanthropic.go | 17 ++++--------- .../translator/anthropic_gcpanthropic.go | 17 ++++--------- .../translator/anthropic_gcpanthropic_test.go | 3 ++- .../extproc/translator/anthropic_helper.go | 24 +++++++++++++++++++ .../extproc/translator/openai_gcpanthropic.go | 1 - 5 files changed, 34 insertions(+), 28 deletions(-) diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go index 6968fd731c..8e561352bb 100644 --- a/internal/extproc/translator/anthropic_awsanthropic.go +++ b/internal/extproc/translator/anthropic_awsanthropic.go @@ -9,7 +9,6 @@ import ( "encoding/json" "fmt" "io" - "maps" "net/url" extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" @@ -44,22 +43,14 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(_ []byte, body *anthropi // Extract model name for AWS Bedrock endpoint from the parsed request. modelName := body.GetModel() - // Work directly with the map since MessagesRequest is already map[string]interface{}. - anthropicReq := make(map[string]any) - maps.Copy(anthropicReq, *body) - // Apply model name override if configured. a.requestModel = applyModelNameOverride(modelName, a.modelNameOverride) - // Remove the model field since AWS Bedrock doesn't want it in the body (it's in the path). - delete(anthropicReq, "model") - - // Add AWS-Bedrock-specific anthropic_version field (required by AWS Bedrock). - // Uses backend config version (e.g., "bedrock-2023-05-31" for AWS Bedrock). - if a.apiVersion == "" { - return nil, nil, fmt.Errorf("anthropic_version is required for AWS Bedrock but not provided in backend configuration") + // Prepare the request body (removes model field, adds anthropic_version). + anthropicReq, err := prepareAnthropicRequest(body, a.apiVersion) + if err != nil { + return nil, nil, fmt.Errorf("failed to prepare request for AWS Bedrock: %w", err) } - anthropicReq[anthropicVersionKey] = a.apiVersion // Marshal the modified request. mutatedBody, err := json.Marshal(anthropicReq) diff --git a/internal/extproc/translator/anthropic_gcpanthropic.go b/internal/extproc/translator/anthropic_gcpanthropic.go index f5a8bbc799..9bb8c07672 100644 --- a/internal/extproc/translator/anthropic_gcpanthropic.go +++ b/internal/extproc/translator/anthropic_gcpanthropic.go @@ -9,7 +9,6 @@ import ( "encoding/json" "fmt" "io" - "maps" extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" @@ -42,22 +41,14 @@ func (a *anthropicToGCPAnthropicTranslator) RequestBody(_ []byte, body *anthropi // Extract model name for GCP endpoint from the parsed request. modelName := body.GetModel() - // Work directly with the map since MessagesRequest is already map[string]interface{}. - anthropicReq := make(map[string]any) - maps.Copy(anthropicReq, *body) - // Apply model name override if configured. a.requestModel = applyModelNameOverride(modelName, a.modelNameOverride) - // Remove the model field since GCP doesn't want it in the body. - delete(anthropicReq, "model") - - // Add GCP-specific anthropic_version field (required by GCP Vertex AI). - // Uses backend config version (e.g., "vertex-2023-10-16" for GCP Vertex AI). - if a.apiVersion == "" { - return nil, nil, fmt.Errorf("anthropic_version is required for GCP Vertex AI but not provided in backend configuration") + // Prepare the request body (removes model field, adds anthropic_version). + anthropicReq, err := prepareAnthropicRequest(body, a.apiVersion) + if err != nil { + return nil, nil, fmt.Errorf("failed to prepare request for GCP Vertex AI: %w", err) } - anthropicReq[anthropicVersionKey] = a.apiVersion // Marshal the modified request. mutatedBody, err := json.Marshal(anthropicReq) diff --git a/internal/extproc/translator/anthropic_gcpanthropic_test.go b/internal/extproc/translator/anthropic_gcpanthropic_test.go index 2c882399ba..2706ff57ff 100644 --- a/internal/extproc/translator/anthropic_gcpanthropic_test.go +++ b/internal/extproc/translator/anthropic_gcpanthropic_test.go @@ -234,7 +234,8 @@ func TestAnthropicToGCPAnthropicTranslator_BackendVersionHandling(t *testing.T) if tt.shouldError { require.Error(t, err) - require.Contains(t, err.Error(), "anthropic_version is required for GCP Vertex AI") + require.Contains(t, err.Error(), "anthropic_version is required") + require.Contains(t, err.Error(), "GCP Vertex AI") return } diff --git a/internal/extproc/translator/anthropic_helper.go b/internal/extproc/translator/anthropic_helper.go index 44cdc08d09..667fe911f7 100644 --- a/internal/extproc/translator/anthropic_helper.go +++ b/internal/extproc/translator/anthropic_helper.go @@ -10,13 +10,19 @@ import ( "encoding/json" "fmt" "io" + "maps" "github.com/anthropics/anthropic-sdk-go" extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" + anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic" "github.com/envoyproxy/ai-gateway/internal/internalapi" ) +const ( + anthropicVersionKey = "anthropic_version" +) + // anthropicResponseHandler provides shared response handling logic for Anthropic-compatible APIs. // This handler is stateless and used by AWS Bedrock and GCP Vertex AI translators to avoid code duplication. type anthropicResponseHandler struct{} @@ -141,3 +147,21 @@ func applyModelNameOverride(originalModel internalapi.RequestModel, override int } return originalModel } + +// prepareAnthropicRequest prepares the request body for cloud providers (AWS/GCP) +// The anthropic_version field is required by cloud provider implementations. +func prepareAnthropicRequest(body *anthropicschema.MessagesRequest, apiVersion string) (map[string]any, error) { + anthropicReq := make(map[string]any) + maps.Copy(anthropicReq, *body) + + // Remove model field - cloud providers use it in the URL path instead + delete(anthropicReq, "model") + + // Add required anthropic_version field + if apiVersion == "" { + return nil, fmt.Errorf("anthropic_version is required but not provided in backend configuration") + } + anthropicReq[anthropicVersionKey] = apiVersion + + return anthropicReq, nil +} diff --git a/internal/extproc/translator/openai_gcpanthropic.go b/internal/extproc/translator/openai_gcpanthropic.go index f089595f5d..31d2fb8766 100644 --- a/internal/extproc/translator/openai_gcpanthropic.go +++ b/internal/extproc/translator/openai_gcpanthropic.go @@ -29,7 +29,6 @@ import ( // currently a requirement for GCP Vertex / Anthropic API https://docs.anthropic.com/en/api/claude-on-vertex-ai const ( - anthropicVersionKey = "anthropic_version" gcpBackendError = "GCPBackendError" tempNotSupportedError = "temperature %.2f is not supported by Anthropic (must be between 0.0 and 1.0)" ) From 729a190993319c704e7bffd03a7ab99c86791acf Mon Sep 17 00:00:00 2001 From: secustor Date: Fri, 24 Oct 2025 18:17:23 +0200 Subject: [PATCH 06/15] Revert "refactor: extract Anthropic request header generation for AWS and GCP" This reverts commit bf5ded23e9d2db78c56be5d0a8a9871e3281755f. Signed-off-by: secustor --- .../translator/anthropic_awsanthropic.go | 17 +++++++++---- .../translator/anthropic_gcpanthropic.go | 17 +++++++++---- .../translator/anthropic_gcpanthropic_test.go | 3 +-- .../extproc/translator/anthropic_helper.go | 24 ------------------- .../extproc/translator/openai_gcpanthropic.go | 1 + 5 files changed, 28 insertions(+), 34 deletions(-) diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go index 8e561352bb..6968fd731c 100644 --- a/internal/extproc/translator/anthropic_awsanthropic.go +++ b/internal/extproc/translator/anthropic_awsanthropic.go @@ -9,6 +9,7 @@ import ( "encoding/json" "fmt" "io" + "maps" "net/url" extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" @@ -43,14 +44,22 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(_ []byte, body *anthropi // Extract model name for AWS Bedrock endpoint from the parsed request. modelName := body.GetModel() + // Work directly with the map since MessagesRequest is already map[string]interface{}. + anthropicReq := make(map[string]any) + maps.Copy(anthropicReq, *body) + // Apply model name override if configured. a.requestModel = applyModelNameOverride(modelName, a.modelNameOverride) - // Prepare the request body (removes model field, adds anthropic_version). - anthropicReq, err := prepareAnthropicRequest(body, a.apiVersion) - if err != nil { - return nil, nil, fmt.Errorf("failed to prepare request for AWS Bedrock: %w", err) + // Remove the model field since AWS Bedrock doesn't want it in the body (it's in the path). + delete(anthropicReq, "model") + + // Add AWS-Bedrock-specific anthropic_version field (required by AWS Bedrock). + // Uses backend config version (e.g., "bedrock-2023-05-31" for AWS Bedrock). + if a.apiVersion == "" { + return nil, nil, fmt.Errorf("anthropic_version is required for AWS Bedrock but not provided in backend configuration") } + anthropicReq[anthropicVersionKey] = a.apiVersion // Marshal the modified request. mutatedBody, err := json.Marshal(anthropicReq) diff --git a/internal/extproc/translator/anthropic_gcpanthropic.go b/internal/extproc/translator/anthropic_gcpanthropic.go index 9bb8c07672..f5a8bbc799 100644 --- a/internal/extproc/translator/anthropic_gcpanthropic.go +++ b/internal/extproc/translator/anthropic_gcpanthropic.go @@ -9,6 +9,7 @@ import ( "encoding/json" "fmt" "io" + "maps" extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" @@ -41,14 +42,22 @@ func (a *anthropicToGCPAnthropicTranslator) RequestBody(_ []byte, body *anthropi // Extract model name for GCP endpoint from the parsed request. modelName := body.GetModel() + // Work directly with the map since MessagesRequest is already map[string]interface{}. + anthropicReq := make(map[string]any) + maps.Copy(anthropicReq, *body) + // Apply model name override if configured. a.requestModel = applyModelNameOverride(modelName, a.modelNameOverride) - // Prepare the request body (removes model field, adds anthropic_version). - anthropicReq, err := prepareAnthropicRequest(body, a.apiVersion) - if err != nil { - return nil, nil, fmt.Errorf("failed to prepare request for GCP Vertex AI: %w", err) + // Remove the model field since GCP doesn't want it in the body. + delete(anthropicReq, "model") + + // Add GCP-specific anthropic_version field (required by GCP Vertex AI). + // Uses backend config version (e.g., "vertex-2023-10-16" for GCP Vertex AI). + if a.apiVersion == "" { + return nil, nil, fmt.Errorf("anthropic_version is required for GCP Vertex AI but not provided in backend configuration") } + anthropicReq[anthropicVersionKey] = a.apiVersion // Marshal the modified request. mutatedBody, err := json.Marshal(anthropicReq) diff --git a/internal/extproc/translator/anthropic_gcpanthropic_test.go b/internal/extproc/translator/anthropic_gcpanthropic_test.go index 2706ff57ff..2c882399ba 100644 --- a/internal/extproc/translator/anthropic_gcpanthropic_test.go +++ b/internal/extproc/translator/anthropic_gcpanthropic_test.go @@ -234,8 +234,7 @@ func TestAnthropicToGCPAnthropicTranslator_BackendVersionHandling(t *testing.T) if tt.shouldError { require.Error(t, err) - require.Contains(t, err.Error(), "anthropic_version is required") - require.Contains(t, err.Error(), "GCP Vertex AI") + require.Contains(t, err.Error(), "anthropic_version is required for GCP Vertex AI") return } diff --git a/internal/extproc/translator/anthropic_helper.go b/internal/extproc/translator/anthropic_helper.go index 667fe911f7..44cdc08d09 100644 --- a/internal/extproc/translator/anthropic_helper.go +++ b/internal/extproc/translator/anthropic_helper.go @@ -10,19 +10,13 @@ import ( "encoding/json" "fmt" "io" - "maps" "github.com/anthropics/anthropic-sdk-go" extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" - anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic" "github.com/envoyproxy/ai-gateway/internal/internalapi" ) -const ( - anthropicVersionKey = "anthropic_version" -) - // anthropicResponseHandler provides shared response handling logic for Anthropic-compatible APIs. // This handler is stateless and used by AWS Bedrock and GCP Vertex AI translators to avoid code duplication. type anthropicResponseHandler struct{} @@ -147,21 +141,3 @@ func applyModelNameOverride(originalModel internalapi.RequestModel, override int } return originalModel } - -// prepareAnthropicRequest prepares the request body for cloud providers (AWS/GCP) -// The anthropic_version field is required by cloud provider implementations. -func prepareAnthropicRequest(body *anthropicschema.MessagesRequest, apiVersion string) (map[string]any, error) { - anthropicReq := make(map[string]any) - maps.Copy(anthropicReq, *body) - - // Remove model field - cloud providers use it in the URL path instead - delete(anthropicReq, "model") - - // Add required anthropic_version field - if apiVersion == "" { - return nil, fmt.Errorf("anthropic_version is required but not provided in backend configuration") - } - anthropicReq[anthropicVersionKey] = apiVersion - - return anthropicReq, nil -} diff --git a/internal/extproc/translator/openai_gcpanthropic.go b/internal/extproc/translator/openai_gcpanthropic.go index 31d2fb8766..f089595f5d 100644 --- a/internal/extproc/translator/openai_gcpanthropic.go +++ b/internal/extproc/translator/openai_gcpanthropic.go @@ -29,6 +29,7 @@ import ( // currently a requirement for GCP Vertex / Anthropic API https://docs.anthropic.com/en/api/claude-on-vertex-ai const ( + anthropicVersionKey = "anthropic_version" gcpBackendError = "GCPBackendError" tempNotSupportedError = "temperature %.2f is not supported by Anthropic (must be between 0.0 and 1.0)" ) From 46505bc15d48d908acbcb8b23af7760a2ba01c89 Mon Sep 17 00:00:00 2001 From: secustor Date: Fri, 24 Oct 2025 18:17:24 +0200 Subject: [PATCH 07/15] Revert "refactor: extract ResponseHandler and applyModelNameOverride" This reverts commit 460c72e67b330a4f408113755b6641f209089f8f. Signed-off-by: secustor --- .../translator/anthropic_awsanthropic.go | 94 +++++++++++- .../translator/anthropic_gcpanthropic.go | 94 +++++++++++- .../extproc/translator/anthropic_helper.go | 143 ------------------ 3 files changed, 176 insertions(+), 155 deletions(-) delete mode 100644 internal/extproc/translator/anthropic_helper.go diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go index 6968fd731c..735c9a8119 100644 --- a/internal/extproc/translator/anthropic_awsanthropic.go +++ b/internal/extproc/translator/anthropic_awsanthropic.go @@ -6,12 +6,14 @@ package translator import ( + "bytes" "encoding/json" "fmt" "io" "maps" "net/url" + "github.com/anthropics/anthropic-sdk-go" extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic" @@ -25,15 +27,14 @@ func NewAnthropicToAWSAnthropicTranslator(apiVersion string, modelNameOverride i return &anthropicToAWSAnthropicTranslator{ apiVersion: apiVersion, modelNameOverride: modelNameOverride, - responseHandler: newAnthropicResponseHandler(), } } type anthropicToAWSAnthropicTranslator struct { + // TODO: reuse anthropicToAnthropicTranslator and embed it here to avoid code duplication. apiVersion string modelNameOverride internalapi.ModelNameOverride requestModel internalapi.RequestModel - responseHandler *anthropicResponseHandler } // RequestBody implements [AnthropicMessagesTranslator.RequestBody] for Anthropic to AWS Bedrock Anthropic translation. @@ -49,7 +50,10 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(_ []byte, body *anthropi maps.Copy(anthropicReq, *body) // Apply model name override if configured. - a.requestModel = applyModelNameOverride(modelName, a.modelNameOverride) + a.requestModel = modelName + if a.modelNameOverride != "" { + a.requestModel = a.modelNameOverride + } // Remove the model field since AWS Bedrock doesn't want it in the body (it's in the path). delete(anthropicReq, "model") @@ -94,9 +98,87 @@ func (a *anthropicToAWSAnthropicTranslator) ResponseHeaders(_ map[string]string) } // ResponseBody implements [AnthropicMessagesTranslator.ResponseBody] for Anthropic to AWS Bedrock Anthropic. -// This delegates to the shared anthropicResponseHandler since AWS Bedrock returns the native Anthropic response format. -func (a *anthropicToAWSAnthropicTranslator) ResponseBody(headers map[string]string, body io.Reader, endOfStream bool) ( +// This is essentially a passthrough since AWS Bedrock returns the native Anthropic response format. +func (a *anthropicToAWSAnthropicTranslator) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool) ( headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error, ) { - return a.responseHandler.ResponseBody(headers, body, endOfStream, a.requestModel) + // Read the response body for both streaming and non-streaming. + bodyBytes, err := io.ReadAll(body) + if err != nil { + return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err) + } + + // For streaming chunks, parse SSE format to extract token usage. + if !endOfStream { + // Parse SSE format - split by lines and look for data: lines. + for line := range bytes.Lines(bodyBytes) { + line = bytes.TrimSpace(line) + if bytes.HasPrefix(line, dataPrefix) { + jsonData := bytes.TrimPrefix(line, dataPrefix) + + var eventData map[string]any + if unmarshalErr := json.Unmarshal(jsonData, &eventData); unmarshalErr != nil { + // Skip lines with invalid JSON (like ping events or malformed data). + continue + } + if eventType, ok := eventData["type"].(string); ok { + switch eventType { + case "message_start": + // Extract input tokens from message.usage. + if messageData, ok := eventData["message"].(map[string]any); ok { + if usageData, ok := messageData["usage"].(map[string]any); ok { + if inputTokens, ok := usageData["input_tokens"].(float64); ok { + tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec + } + // Some message_start events may include initial output tokens. + if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 { + tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec + } + tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens + } + } + + case "message_delta": + if usageData, ok := eventData["usage"].(map[string]any); ok { + if outputTokens, ok := usageData["output_tokens"].(float64); ok { + // Add to existing output tokens (in case message_start had some initial ones). + tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec + tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens + } + } + } + } + } + } + + return nil, &extprocv3.BodyMutation{ + Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, + }, tokenUsage, a.requestModel, nil + } + + // Parse the Anthropic response to extract token usage. + var anthropicResp anthropic.Message + if err = json.Unmarshal(bodyBytes, &anthropicResp); err != nil { + // If we can't parse as Anthropic format, pass through as-is. + return nil, &extprocv3.BodyMutation{ + Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, + }, LLMTokenUsage{}, a.requestModel, nil + } + + // Extract token usage from the response. + tokenUsage = LLMTokenUsage{ + InputTokens: uint32(anthropicResp.Usage.InputTokens), //nolint:gosec + OutputTokens: uint32(anthropicResp.Usage.OutputTokens), //nolint:gosec + TotalTokens: uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec + CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens), //nolint:gosec + } + + // Pass through the response body unchanged since both input and output are Anthropic format. + headerMutation = &extprocv3.HeaderMutation{} + setContentLength(headerMutation, bodyBytes) + bodyMutation = &extprocv3.BodyMutation{ + Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, + } + + return headerMutation, bodyMutation, tokenUsage, a.requestModel, nil } diff --git a/internal/extproc/translator/anthropic_gcpanthropic.go b/internal/extproc/translator/anthropic_gcpanthropic.go index f5a8bbc799..37a5d4a5a3 100644 --- a/internal/extproc/translator/anthropic_gcpanthropic.go +++ b/internal/extproc/translator/anthropic_gcpanthropic.go @@ -6,11 +6,13 @@ package translator import ( + "bytes" "encoding/json" "fmt" "io" "maps" + "github.com/anthropics/anthropic-sdk-go" extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic" @@ -23,15 +25,14 @@ func NewAnthropicToGCPAnthropicTranslator(apiVersion string, modelNameOverride i return &anthropicToGCPAnthropicTranslator{ apiVersion: apiVersion, modelNameOverride: modelNameOverride, - responseHandler: newAnthropicResponseHandler(), } } type anthropicToGCPAnthropicTranslator struct { + // TODO: reuse anthropicToAnthropicTranslator and embed it here to avoid code duplication. apiVersion string modelNameOverride internalapi.ModelNameOverride requestModel internalapi.RequestModel - responseHandler *anthropicResponseHandler } // RequestBody implements [AnthropicMessagesTranslator.RequestBody] for Anthropic to GCP Anthropic translation. @@ -47,7 +48,10 @@ func (a *anthropicToGCPAnthropicTranslator) RequestBody(_ []byte, body *anthropi maps.Copy(anthropicReq, *body) // Apply model name override if configured. - a.requestModel = applyModelNameOverride(modelName, a.modelNameOverride) + a.requestModel = modelName + if a.modelNameOverride != "" { + a.requestModel = a.modelNameOverride + } // Remove the model field since GCP doesn't want it in the body. delete(anthropicReq, "model") @@ -86,9 +90,87 @@ func (a *anthropicToGCPAnthropicTranslator) ResponseHeaders(_ map[string]string) } // ResponseBody implements [AnthropicMessagesTranslator.ResponseBody] for Anthropic to GCP Anthropic. -// This delegates to the shared anthropicResponseHandler since GCP Vertex AI returns the native Anthropic response format. -func (a *anthropicToGCPAnthropicTranslator) ResponseBody(headers map[string]string, body io.Reader, endOfStream bool) ( +// This is essentially a passthrough since both use the same Anthropic response format. +func (a *anthropicToGCPAnthropicTranslator) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool) ( headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error, ) { - return a.responseHandler.ResponseBody(headers, body, endOfStream, a.requestModel) + // Read the response body for both streaming and non-streaming. + bodyBytes, err := io.ReadAll(body) + if err != nil { + return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err) + } + + // For streaming chunks, parse SSE format to extract token usage. + if !endOfStream { + // Parse SSE format - split by lines and look for data: lines. + for line := range bytes.Lines(bodyBytes) { + line = bytes.TrimSpace(line) + if bytes.HasPrefix(line, dataPrefix) { + jsonData := bytes.TrimPrefix(line, dataPrefix) + + var eventData map[string]any + if unmarshalErr := json.Unmarshal(jsonData, &eventData); unmarshalErr != nil { + // Skip lines with invalid JSON (like ping events or malformed data). + continue + } + if eventType, ok := eventData["type"].(string); ok { + switch eventType { + case "message_start": + // Extract input tokens from message.usage. + if messageData, ok := eventData["message"].(map[string]any); ok { + if usageData, ok := messageData["usage"].(map[string]any); ok { + if inputTokens, ok := usageData["input_tokens"].(float64); ok { + tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec + } + // Some message_start events may include initial output tokens. + if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 { + tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec + } + tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens + } + } + + case "message_delta": + if usageData, ok := eventData["usage"].(map[string]any); ok { + if outputTokens, ok := usageData["output_tokens"].(float64); ok { + // Add to existing output tokens (in case message_start had some initial ones). + tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec + tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens + } + } + } + } + } + } + + return nil, &extprocv3.BodyMutation{ + Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, + }, tokenUsage, a.requestModel, nil + } + + // Parse the Anthropic response to extract token usage. + var anthropicResp anthropic.Message + if err = json.Unmarshal(bodyBytes, &anthropicResp); err != nil { + // If we can't parse as Anthropic format, pass through as-is. + return nil, &extprocv3.BodyMutation{ + Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, + }, LLMTokenUsage{}, a.requestModel, nil + } + + // Extract token usage from the response. + tokenUsage = LLMTokenUsage{ + InputTokens: uint32(anthropicResp.Usage.InputTokens), //nolint:gosec + OutputTokens: uint32(anthropicResp.Usage.OutputTokens), //nolint:gosec + TotalTokens: uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec + CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens), //nolint:gosec + } + + // Pass through the response body unchanged since both input and output are Anthropic format. + headerMutation = &extprocv3.HeaderMutation{} + setContentLength(headerMutation, bodyBytes) + bodyMutation = &extprocv3.BodyMutation{ + Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, + } + + return headerMutation, bodyMutation, tokenUsage, a.requestModel, nil } diff --git a/internal/extproc/translator/anthropic_helper.go b/internal/extproc/translator/anthropic_helper.go deleted file mode 100644 index 44cdc08d09..0000000000 --- a/internal/extproc/translator/anthropic_helper.go +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright Envoy AI Gateway Authors -// SPDX-License-Identifier: Apache-2.0 -// The full text of the Apache license is available in the LICENSE file at -// the root of the repo. - -package translator - -import ( - "bytes" - "encoding/json" - "fmt" - "io" - - "github.com/anthropics/anthropic-sdk-go" - extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" - - "github.com/envoyproxy/ai-gateway/internal/internalapi" -) - -// anthropicResponseHandler provides shared response handling logic for Anthropic-compatible APIs. -// This handler is stateless and used by AWS Bedrock and GCP Vertex AI translators to avoid code duplication. -type anthropicResponseHandler struct{} - -// newAnthropicResponseHandler creates a new stateless response handler. -func newAnthropicResponseHandler() *anthropicResponseHandler { - return &anthropicResponseHandler{} -} - -// ResponseBody handles both streaming and non-streaming Anthropic API responses. -// It extracts token usage information and returns the response unchanged (passthrough). -// The requestModel parameter is used to populate the responseModel return value. -func (h *anthropicResponseHandler) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool, requestModel internalapi.RequestModel) ( - headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error, -) { - // Read the response body for both streaming and non-streaming. - bodyBytes, err := io.ReadAll(body) - if err != nil { - return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err) - } - - // For streaming chunks, parse SSE format to extract token usage. - if !endOfStream { - tokenUsage = h.extractTokenUsageFromSSE(bodyBytes) - return nil, &extprocv3.BodyMutation{ - Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, - }, tokenUsage, requestModel, nil - } - - // For non-streaming responses, parse the complete Anthropic response. - tokenUsage, err = h.extractTokenUsageFromResponse(bodyBytes) - if err != nil { - // If we can't parse as Anthropic format, pass through as-is. - return nil, &extprocv3.BodyMutation{ - Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, - }, LLMTokenUsage{}, requestModel, nil - } - - // Pass through the response body unchanged since both input and output are Anthropic format. - headerMutation = &extprocv3.HeaderMutation{} - setContentLength(headerMutation, bodyBytes) - bodyMutation = &extprocv3.BodyMutation{ - Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, - } - - return headerMutation, bodyMutation, tokenUsage, requestModel, nil -} - -// extractTokenUsageFromSSE parses SSE (Server-Sent Events) format streaming responses -// to extract token usage information from message_start and message_delta events. -func (h *anthropicResponseHandler) extractTokenUsageFromSSE(bodyBytes []byte) LLMTokenUsage { - var tokenUsage LLMTokenUsage - - // Parse SSE format - split by lines and look for data: lines. - for line := range bytes.Lines(bodyBytes) { - line = bytes.TrimSpace(line) - if !bytes.HasPrefix(line, dataPrefix) { - continue - } - jsonData := bytes.TrimPrefix(line, dataPrefix) - - var eventData map[string]any - if unmarshalErr := json.Unmarshal(jsonData, &eventData); unmarshalErr != nil { - // Skip lines with invalid JSON (like ping events or malformed data). - continue - } - - if eventType, ok := eventData["type"].(string); ok { - switch eventType { - case "message_start": - // Extract input tokens from message.usage. - if messageData, ok := eventData["message"].(map[string]any); ok { - if usageData, ok := messageData["usage"].(map[string]any); ok { - if inputTokens, ok := usageData["input_tokens"].(float64); ok { - tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec - } - // Some message_start events may include initial output tokens. - if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 { - tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec - } - tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens - } - } - - case "message_delta": - if usageData, ok := eventData["usage"].(map[string]any); ok { - if outputTokens, ok := usageData["output_tokens"].(float64); ok { - // Add to existing output tokens (in case message_start had some initial ones). - tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec - tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens - } - } - } - } - } - - return tokenUsage -} - -// extractTokenUsageFromResponse parses a complete (non-streaming) Anthropic response -// to extract token usage information. -func (h *anthropicResponseHandler) extractTokenUsageFromResponse(bodyBytes []byte) (LLMTokenUsage, error) { - var anthropicResp anthropic.Message - if err := json.Unmarshal(bodyBytes, &anthropicResp); err != nil { - return LLMTokenUsage{}, err - } - - tokenUsage := LLMTokenUsage{ - InputTokens: uint32(anthropicResp.Usage.InputTokens), //nolint:gosec - OutputTokens: uint32(anthropicResp.Usage.OutputTokens), //nolint:gosec - TotalTokens: uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec - CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens), //nolint:gosec - } - - return tokenUsage, nil -} - -// applyModelNameOverride applies model name override logic used by AWS and GCP translators. -func applyModelNameOverride(originalModel internalapi.RequestModel, override internalapi.ModelNameOverride) internalapi.RequestModel { - if override != "" { - return override - } - return originalModel -} From 353087737918ff1540fc55f8a03e1964bb366755 Mon Sep 17 00:00:00 2001 From: secustor Date: Sat, 25 Oct 2025 00:48:17 +0200 Subject: [PATCH 08/15] refactor: use antrophic to anthropic translator Signed-off-by: secustor --- internal/extproc/messages_processor_test.go | 78 +++ .../translator/anthropic_awsanthropic.go | 153 ++---- .../translator/anthropic_awsanthropic_test.go | 449 +++++++++++------- 3 files changed, 385 insertions(+), 295 deletions(-) diff --git a/internal/extproc/messages_processor_test.go b/internal/extproc/messages_processor_test.go index 7d0b696c4e..3c09a376da 100644 --- a/internal/extproc/messages_processor_test.go +++ b/internal/extproc/messages_processor_test.go @@ -826,6 +826,84 @@ func TestMessagesProcessorUpstreamFilter_ProcessRequestHeaders_WithHeaderMutatio // Check that original headers remain unchanged. require.Equal(t, "bearer token123", headers["authorization"]) }) + + t.Run("multiple header mutations with same key - last one wins", func(t *testing.T) { + headers := map[string]string{ + ":path": "/anthropic/v1/messages", + "x-ai-eg-model": "anthropic.claude-3-haiku-20240307-v1:0", + } + + // Create request body. + requestBody := &anthropicschema.MessagesRequest{ + "model": "anthropic.claude-3-haiku-20240307-v1:0", + "max_tokens": 1000, + "messages": []any{map[string]any{"role": "user", "content": "Hello"}}, + } + requestBodyRaw := []byte(`{"model": "anthropic.claude-3-haiku-20240307-v1:0", "max_tokens": 1000, "messages": [{"role": "user", "content": "Hello"}]}`) + + // Create mock translator that returns multiple header mutations for the same key. + // This simulates a scenario where the translator sets :path multiple times. + mockTranslator := mockAnthropicTranslator{ + t: t, + expRequestBody: requestBody, + expForceRequestBodyMutation: false, + retHeaderMutation: &extprocv3.HeaderMutation{ + SetHeaders: []*corev3.HeaderValueOption{ + { + Header: &corev3.HeaderValue{ + Key: ":path", + RawValue: []byte("/anthropic/v1/messages"), + }, + }, + { + Header: &corev3.HeaderValue{ + Key: ":path", + RawValue: []byte("/model/anthropic.claude-3-haiku-20240307-v1:0/invoke"), + }, + }, + }, + }, + retBodyMutation: &extprocv3.BodyMutation{}, + retErr: nil, + } + + // Create mock metrics. + chatMetrics := metrics.NewChatCompletionFactory(noop.NewMeterProvider().Meter("test"), map[string]string{})() + + // Create processor. + processor := &messagesProcessorUpstreamFilter{ + config: &processorConfig{}, + requestHeaders: headers, + logger: slog.Default(), + metrics: chatMetrics, + translator: mockTranslator, + originalRequestBody: requestBody, + originalRequestBodyRaw: requestBodyRaw, + handler: &mockBackendAuthHandler{}, + } + + ctx := context.Background() + response, err := processor.ProcessRequestHeaders(ctx, nil) + + require.NoError(t, err) + require.NotNil(t, response) + + commonRes := response.Response.(*extprocv3.ProcessingResponse_RequestHeaders).RequestHeaders.Response + + // Check that header mutations were applied. + require.NotNil(t, commonRes.HeaderMutation) + require.Len(t, commonRes.HeaderMutation.SetHeaders, 2) + + // Verify that both header mutations are present, with the last one being the final value. + require.Equal(t, ":path", commonRes.HeaderMutation.SetHeaders[0].Header.Key) + require.Equal(t, []byte("/anthropic/v1/messages"), commonRes.HeaderMutation.SetHeaders[0].Header.RawValue) + + require.Equal(t, ":path", commonRes.HeaderMutation.SetHeaders[1].Header.Key) + require.Equal(t, []byte("/model/anthropic.claude-3-haiku-20240307-v1:0/invoke"), commonRes.HeaderMutation.SetHeaders[1].Header.RawValue) + + // The last mutation should win - verify the header value in the processor's headers. + require.Equal(t, "/model/anthropic.claude-3-haiku-20240307-v1:0/invoke", headers[":path"]) + }) } func TestMessagesProcessorUpstreamFilter_SetBackend_WithHeaderMutations(t *testing.T) { diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go index 735c9a8119..cc61535364 100644 --- a/internal/extproc/translator/anthropic_awsanthropic.go +++ b/internal/extproc/translator/anthropic_awsanthropic.go @@ -6,15 +6,13 @@ package translator import ( - "bytes" - "encoding/json" "fmt" "io" - "maps" "net/url" - "github.com/anthropics/anthropic-sdk-go" + corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" + "github.com/tidwall/sjson" anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic" "github.com/envoyproxy/ai-gateway/internal/internalapi" @@ -24,56 +22,51 @@ import ( // AWS Bedrock supports the native Anthropic Messages API, so this is essentially a passthrough // translator with AWS-specific path modifications. func NewAnthropicToAWSAnthropicTranslator(apiVersion string, modelNameOverride internalapi.ModelNameOverride) AnthropicMessagesTranslator { + anthropicTranslator := NewAnthropicToAnthropicTranslator(apiVersion, modelNameOverride).(*anthropicToAnthropicTranslator) return &anthropicToAWSAnthropicTranslator{ - apiVersion: apiVersion, - modelNameOverride: modelNameOverride, + apiVersion: apiVersion, + anthropicToAnthropicTranslator: *anthropicTranslator, } } type anthropicToAWSAnthropicTranslator struct { - // TODO: reuse anthropicToAnthropicTranslator and embed it here to avoid code duplication. - apiVersion string - modelNameOverride internalapi.ModelNameOverride - requestModel internalapi.RequestModel + anthropicToAnthropicTranslator + apiVersion string } // RequestBody implements [AnthropicMessagesTranslator.RequestBody] for Anthropic to AWS Bedrock Anthropic translation. // This handles the transformation from native Anthropic format to AWS Bedrock format. -func (a *anthropicToAWSAnthropicTranslator) RequestBody(_ []byte, body *anthropicschema.MessagesRequest, _ bool) ( +// https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages-request-response.html +func (a *anthropicToAWSAnthropicTranslator) RequestBody(rawBody []byte, body *anthropicschema.MessagesRequest, _ bool) ( headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, err error, ) { - // Extract model name for AWS Bedrock endpoint from the parsed request. - modelName := body.GetModel() - - // Work directly with the map since MessagesRequest is already map[string]interface{}. - anthropicReq := make(map[string]any) - maps.Copy(anthropicReq, *body) - - // Apply model name override if configured. - a.requestModel = modelName - if a.modelNameOverride != "" { - a.requestModel = a.modelNameOverride + // AWS Bedrock always needs a body mutation because we must add anthropic_version and remove model field + headerMutation, bodyMutation, err = a.anthropicToAnthropicTranslator.RequestBody(rawBody, body, true) + if err != nil { + return } - // Remove the model field since AWS Bedrock doesn't want it in the body (it's in the path). - delete(anthropicReq, "model") - - // Add AWS-Bedrock-specific anthropic_version field (required by AWS Bedrock). - // Uses backend config version (e.g., "bedrock-2023-05-31" for AWS Bedrock). - if a.apiVersion == "" { - return nil, nil, fmt.Errorf("anthropic_version is required for AWS Bedrock but not provided in backend configuration") + // add anthropic_version field + preparedBody, err := sjson.SetBytes(bodyMutation.GetBody(), anthropicVersionKey, a.apiVersion) + if err != nil { + return nil, nil, fmt.Errorf("failed to set anthropic_version field: %w", err) } - anthropicReq[anthropicVersionKey] = a.apiVersion - - // Marshal the modified request. - mutatedBody, err := json.Marshal(anthropicReq) + // delete model field as AWS Bedrock expects model in the path, not in the body + preparedBody, err = sjson.DeleteBytes(preparedBody, "model") if err != nil { - return nil, nil, fmt.Errorf("failed to marshal modified request: %w", err) + return nil, nil, fmt.Errorf("failed to delete model field: %w", err) } + bodyMutation = &extprocv3.BodyMutation{ + Mutation: &extprocv3.BodyMutation_Body{Body: preparedBody}, + } + + // update content length after changing the body + setContentLength(headerMutation, preparedBody) + // Determine the AWS Bedrock path based on whether streaming is requested. var pathTemplate string - if stream, ok := anthropicReq["stream"].(bool); ok && stream { + if body.GetStream() { pathTemplate = "/model/%s/invoke-stream" } else { pathTemplate = "/model/%s/invoke" @@ -85,7 +78,13 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(_ []byte, body *anthropi encodedModelID := url.PathEscape(a.requestModel) pathSuffix := fmt.Sprintf(pathTemplate, encodedModelID) - headerMutation, bodyMutation = buildRequestMutations(pathSuffix, mutatedBody) + // Overwriting path of the Anthropic to Anthropic translator + headerMutation.SetHeaders = append(headerMutation.SetHeaders, &corev3.HeaderValueOption{ + Header: &corev3.HeaderValue{ + Key: ":path", + RawValue: []byte(pathSuffix), + }, + }) return } @@ -99,86 +98,8 @@ func (a *anthropicToAWSAnthropicTranslator) ResponseHeaders(_ map[string]string) // ResponseBody implements [AnthropicMessagesTranslator.ResponseBody] for Anthropic to AWS Bedrock Anthropic. // This is essentially a passthrough since AWS Bedrock returns the native Anthropic response format. -func (a *anthropicToAWSAnthropicTranslator) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool) ( +func (a *anthropicToAWSAnthropicTranslator) ResponseBody(respHeaders map[string]string, body io.Reader, endOfStream bool) ( headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error, ) { - // Read the response body for both streaming and non-streaming. - bodyBytes, err := io.ReadAll(body) - if err != nil { - return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err) - } - - // For streaming chunks, parse SSE format to extract token usage. - if !endOfStream { - // Parse SSE format - split by lines and look for data: lines. - for line := range bytes.Lines(bodyBytes) { - line = bytes.TrimSpace(line) - if bytes.HasPrefix(line, dataPrefix) { - jsonData := bytes.TrimPrefix(line, dataPrefix) - - var eventData map[string]any - if unmarshalErr := json.Unmarshal(jsonData, &eventData); unmarshalErr != nil { - // Skip lines with invalid JSON (like ping events or malformed data). - continue - } - if eventType, ok := eventData["type"].(string); ok { - switch eventType { - case "message_start": - // Extract input tokens from message.usage. - if messageData, ok := eventData["message"].(map[string]any); ok { - if usageData, ok := messageData["usage"].(map[string]any); ok { - if inputTokens, ok := usageData["input_tokens"].(float64); ok { - tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec - } - // Some message_start events may include initial output tokens. - if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 { - tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec - } - tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens - } - } - - case "message_delta": - if usageData, ok := eventData["usage"].(map[string]any); ok { - if outputTokens, ok := usageData["output_tokens"].(float64); ok { - // Add to existing output tokens (in case message_start had some initial ones). - tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec - tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens - } - } - } - } - } - } - - return nil, &extprocv3.BodyMutation{ - Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, - }, tokenUsage, a.requestModel, nil - } - - // Parse the Anthropic response to extract token usage. - var anthropicResp anthropic.Message - if err = json.Unmarshal(bodyBytes, &anthropicResp); err != nil { - // If we can't parse as Anthropic format, pass through as-is. - return nil, &extprocv3.BodyMutation{ - Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, - }, LLMTokenUsage{}, a.requestModel, nil - } - - // Extract token usage from the response. - tokenUsage = LLMTokenUsage{ - InputTokens: uint32(anthropicResp.Usage.InputTokens), //nolint:gosec - OutputTokens: uint32(anthropicResp.Usage.OutputTokens), //nolint:gosec - TotalTokens: uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec - CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens), //nolint:gosec - } - - // Pass through the response body unchanged since both input and output are Anthropic format. - headerMutation = &extprocv3.HeaderMutation{} - setContentLength(headerMutation, bodyBytes) - bodyMutation = &extprocv3.BodyMutation{ - Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, - } - - return headerMutation, bodyMutation, tokenUsage, a.requestModel, nil + return a.anthropicToAnthropicTranslator.ResponseBody(respHeaders, body, endOfStream) } diff --git a/internal/extproc/translator/anthropic_awsanthropic_test.go b/internal/extproc/translator/anthropic_awsanthropic_test.go index 8d9c442f55..6c4294401f 100644 --- a/internal/extproc/translator/anthropic_awsanthropic_test.go +++ b/internal/extproc/translator/anthropic_awsanthropic_test.go @@ -8,7 +8,6 @@ package translator import ( "bytes" "encoding/json" - "io" "testing" "github.com/anthropics/anthropic-sdk-go" @@ -80,13 +79,17 @@ func TestAnthropicToAWSAnthropicTranslator_RequestBody_ModelNameOverride(t *test }, } - headerMutation, bodyMutation, err := translator.RequestBody(nil, originalReq, false) + rawBody, err := json.Marshal(originalReq) + require.NoError(t, err) + + headerMutation, bodyMutation, err := translator.RequestBody(rawBody, originalReq, false) require.NoError(t, err) require.NotNil(t, headerMutation) require.NotNil(t, bodyMutation) // Check path header contains expected model (URL encoded). - pathHeader := headerMutation.SetHeaders[0] + // Use the last element as it takes precedence when multiple headers are set. + pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-1] require.Equal(t, ":path", pathHeader.Header.Key) expectedPath := "/model/" + tt.expectedInPath + "/invoke" assert.Equal(t, expectedPath, string(pathHeader.Header.RawValue)) @@ -160,7 +163,10 @@ func TestAnthropicToAWSAnthropicTranslator_ComprehensiveMarshalling(t *testing.T }, } - headerMutation, bodyMutation, err := translator.RequestBody(nil, originalReq, false) + rawBody, err := json.Marshal(originalReq) + require.NoError(t, err) + + headerMutation, bodyMutation, err := translator.RequestBody(rawBody, originalReq, false) require.NoError(t, err) require.NotNil(t, headerMutation) require.NotNil(t, bodyMutation) @@ -200,7 +206,8 @@ func TestAnthropicToAWSAnthropicTranslator_ComprehensiveMarshalling(t *testing.T require.True(t, ok, "tool_choice should be an object") require.NotEmpty(t, toolChoice) - pathHeader := headerMutation.SetHeaders[0] + // Use the last element as it takes precedence when multiple headers are set. + pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-1] require.Equal(t, ":path", pathHeader.Header.Key) expectedPath := "/model/anthropic.claude-3-opus-20240229-v1:0/invoke" require.Equal(t, expectedPath, string(pathHeader.Header.RawValue)) @@ -255,12 +262,16 @@ func TestAnthropicToAWSAnthropicTranslator_RequestBody_StreamingPaths(t *testing } } - headerMutation, _, err := translator.RequestBody(nil, parsedReq, false) + rawBody, err := json.Marshal(parsedReq) + require.NoError(t, err) + + headerMutation, _, err := translator.RequestBody(rawBody, parsedReq, false) require.NoError(t, err) require.NotNil(t, headerMutation) // Check path contains expected suffix. - pathHeader := headerMutation.SetHeaders[0] + // Use the last element as it takes precedence when multiple headers are set. + pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-1] expectedPath := "/model/anthropic.claude-3-sonnet-20240229-v1:0" + tt.expectedPathSuffix assert.Equal(t, expectedPath, string(pathHeader.Header.RawValue)) }) @@ -318,7 +329,10 @@ func TestAnthropicToAWSAnthropicTranslator_RequestBody_FieldPassthrough(t *testi "metadata": map[string]any{"user.id": "test123"}, } - _, bodyMutation, err := translator.RequestBody(nil, parsedReq, false) + rawBody, err := json.Marshal(parsedReq) + require.NoError(t, err) + + _, bodyMutation, err := translator.RequestBody(rawBody, parsedReq, false) require.NoError(t, err) require.NotNil(t, bodyMutation) @@ -393,57 +407,6 @@ func TestAnthropicToAWSAnthropicTranslator_ResponseHeaders(t *testing.T) { } } -func TestAnthropicToAWSAnthropicTranslator_ResponseBody_NonStreaming(t *testing.T) { - translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") - - // Create a sample Anthropic response. - respBody := anthropic.Message{ - ID: "msg_test123", - Type: "message", - Role: "assistant", - Content: []anthropic.ContentBlockUnion{ - {Type: "text", Text: "Hello! How can I help you today?"}, - }, - Model: "claude-3-sonnet-20240229", - Usage: anthropic.Usage{ - InputTokens: 25, - OutputTokens: 15, - }, - } - - bodyBytes, err := json.Marshal(respBody) - require.NoError(t, err) - - bodyReader := bytes.NewReader(bodyBytes) - respHeaders := map[string]string{"content-type": "application/json"} - - headerMutation, bodyMutation, tokenUsage, responseModel, err := translator.ResponseBody(respHeaders, bodyReader, true) - require.NoError(t, err) - require.NotNil(t, headerMutation) - require.NotNil(t, bodyMutation) - - expectedUsage := LLMTokenUsage{ - InputTokens: 25, - OutputTokens: 15, - TotalTokens: 40, - } - assert.Equal(t, expectedUsage, tokenUsage) - - // responseModel should be populated from requestModel set during RequestBody. - assert.Empty(t, responseModel) - - // Verify body is passed through - compare key fields. - var outputResp anthropic.Message - err = json.Unmarshal(bodyMutation.GetBody(), &outputResp) - require.NoError(t, err) - assert.Equal(t, respBody.ID, outputResp.ID) - assert.Equal(t, respBody.Type, outputResp.Type) - assert.Equal(t, respBody.Role, outputResp.Role) - assert.Equal(t, respBody.Model, outputResp.Model) - assert.Equal(t, respBody.Usage.InputTokens, outputResp.Usage.InputTokens) - assert.Equal(t, respBody.Usage.OutputTokens, outputResp.Usage.OutputTokens) -} - func TestAnthropicToAWSAnthropicTranslator_ResponseBody_WithCachedTokens(t *testing.T) { translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") @@ -480,171 +443,299 @@ func TestAnthropicToAWSAnthropicTranslator_ResponseBody_WithCachedTokens(t *test assert.Equal(t, expectedUsage, tokenUsage) } -func TestAnthropicToAWSAnthropicTranslator_ResponseBody_StreamingTokenUsage(t *testing.T) { - translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") - +func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) { tests := []struct { - name string - chunk string - endOfStream bool - expectedUsage LLMTokenUsage - expectedBody string + name string + modelID string + expectedPath string }{ { - name: "message_start chunk with token usage", - chunk: "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_123\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"model\":\"claude-3-sonnet-20240229\",\"usage\":{\"input_tokens\":25,\"output_tokens\":0}}}\n\n", - endOfStream: false, - expectedUsage: LLMTokenUsage{ - InputTokens: 25, - OutputTokens: 0, - TotalTokens: 25, - }, - expectedBody: "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_123\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"model\":\"claude-3-sonnet-20240229\",\"usage\":{\"input_tokens\":25,\"output_tokens\":0}}}\n\n", - }, - { - name: "content_block_delta chunk without usage", - chunk: "event: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\" to me.\"}}\n\n", - endOfStream: false, - expectedUsage: LLMTokenUsage{ - InputTokens: 0, - OutputTokens: 0, - TotalTokens: 0, - }, - expectedBody: "event: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\" to me.\"}}\n\n", + name: "simple model ID with colon", + modelID: "anthropic.claude-3-sonnet-20240229-v1:0", + expectedPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke", }, { - name: "message_delta chunk with output tokens", - chunk: "event: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null},\"usage\":{\"output_tokens\":84}}\n\n", - endOfStream: false, - expectedUsage: LLMTokenUsage{ - InputTokens: 0, - OutputTokens: 84, - TotalTokens: 84, - }, - expectedBody: "event: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null},\"usage\":{\"output_tokens\":84}}\n\n", + name: "full ARN with multiple special characters", + modelID: "arn:aws:bedrock:us-east-1:123456789012:foundation-model/anthropic.claude-3-sonnet-20240229-v1:0", + expectedPath: "/model/arn:aws:bedrock:us-east-1:123456789012:foundation-model%2Fanthropic.claude-3-sonnet-20240229-v1:0/invoke", }, { - name: "message_stop chunk without usage", - chunk: "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n", - endOfStream: false, - expectedUsage: LLMTokenUsage{ - InputTokens: 0, - OutputTokens: 0, - TotalTokens: 0, - }, - expectedBody: "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n", + name: "global model prefix", + modelID: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", + expectedPath: "/model/global.anthropic.claude-sonnet-4-5-20250929-v1:0/invoke", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - bodyReader := bytes.NewReader([]byte(tt.chunk)) - respHeaders := map[string]string{"content-type": "text/event-stream"} + translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") - headerMutation, bodyMutation, tokenUsage, _, err := translator.ResponseBody(respHeaders, bodyReader, tt.endOfStream) + originalReq := &anthropicschema.MessagesRequest{ + "model": tt.modelID, + "messages": []anthropic.MessageParam{ + { + Role: anthropic.MessageParamRoleUser, + Content: []anthropic.ContentBlockParamUnion{ + anthropic.NewTextBlock("Test"), + }, + }, + }, + } + rawBody, err := json.Marshal(originalReq) require.NoError(t, err) - require.Nil(t, headerMutation) - require.NotNil(t, bodyMutation) - require.Equal(t, tt.expectedBody, string(bodyMutation.GetBody())) - require.Equal(t, tt.expectedUsage, tokenUsage) - }) - } -} - -func TestAnthropicToAWSAnthropicTranslator_ResponseBody_ReadError(t *testing.T) { - translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") - - // Create a reader that will fail. - errorReader := &awsAnthropicErrorReader{} - respHeaders := map[string]string{"content-type": "application/json"} - - _, _, _, _, err := translator.ResponseBody(respHeaders, errorReader, true) - require.Error(t, err) - assert.Contains(t, err.Error(), "failed to read response body") -} - -// awsAnthropicErrorReader implements io.Reader but always returns an error. -type awsAnthropicErrorReader struct{} - -func (e *awsAnthropicErrorReader) Read(_ []byte) (n int, err error) { - return 0, io.ErrUnexpectedEOF -} -func TestAnthropicToAWSAnthropicTranslator_ResponseBody_InvalidJSON(t *testing.T) { - translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") - - invalidJSON := []byte(`{invalid json}`) - bodyReader := bytes.NewReader(invalidJSON) - respHeaders := map[string]string{"content-type": "application/json"} - - headerMutation, bodyMutation, tokenUsage, _, err := translator.ResponseBody(respHeaders, bodyReader, true) - - // Should not error - just pass through invalid JSON. - require.NoError(t, err) - require.NotNil(t, bodyMutation) - // headerMutation is set with content-length for non-streaming responses - if headerMutation != nil { - assert.NotEmpty(t, headerMutation.SetHeaders) - } - - //nolint:testifylint // testifylint want to use JSONEq which is not possible - assert.Equal(t, invalidJSON, bodyMutation.GetBody()) + headerMutation, _, err := translator.RequestBody(rawBody, originalReq, false) + require.NoError(t, err) + require.NotNil(t, headerMutation) - // Token usage should be zero for invalid JSON. - expectedUsage := LLMTokenUsage{ - InputTokens: 0, - OutputTokens: 0, - TotalTokens: 0, + // Use the last element as it takes precedence when multiple headers are set. + pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-1] + assert.Equal(t, tt.expectedPath, string(pathHeader.Header.RawValue)) + }) } - assert.Equal(t, expectedUsage, tokenUsage) } -func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) { +func TestAnthropicToAWSAnthropicTranslator_FullRequestResponseFlow(t *testing.T) { tests := []struct { - name string - modelID string - expectedPath string + name string + apiVersion string + modelNameOverride string + inputModel string + stream bool + expectedPath string + expectedModel string // Expected model in translator state for response }{ { - name: "simple model ID with colon", - modelID: "anthropic.claude-3-sonnet-20240229-v1:0", - expectedPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke", + name: "non-streaming without override", + apiVersion: "bedrock-2023-05-31", + modelNameOverride: "", + inputModel: "anthropic.claude-3-sonnet-20240229-v1:0", + stream: false, + expectedPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke", + expectedModel: "anthropic.claude-3-sonnet-20240229-v1:0", }, { - name: "full ARN with multiple special characters", - modelID: "arn:aws:bedrock:us-east-1:123456789012:foundation-model/anthropic.claude-3-sonnet-20240229-v1:0", - expectedPath: "/model/arn:aws:bedrock:us-east-1:123456789012:foundation-model%2Fanthropic.claude-3-sonnet-20240229-v1:0/invoke", + name: "streaming without override", + apiVersion: "bedrock-2023-05-31", + modelNameOverride: "", + inputModel: "anthropic.claude-3-haiku-20240307-v1:0", + stream: true, + expectedPath: "/model/anthropic.claude-3-haiku-20240307-v1:0/invoke-stream", + expectedModel: "anthropic.claude-3-haiku-20240307-v1:0", }, { - name: "global model prefix", - modelID: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", - expectedPath: "/model/global.anthropic.claude-sonnet-4-5-20250929-v1:0/invoke", + name: "non-streaming with model override", + apiVersion: "bedrock-2023-05-31", + modelNameOverride: "anthropic.claude-3-opus-20240229-v1:0", + inputModel: "anthropic.claude-3-haiku-20240307-v1:0", + stream: false, + expectedPath: "/model/anthropic.claude-3-opus-20240229-v1:0/invoke", + expectedModel: "anthropic.claude-3-opus-20240229-v1:0", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") + translator := NewAnthropicToAWSAnthropicTranslator(tt.apiVersion, tt.modelNameOverride) originalReq := &anthropicschema.MessagesRequest{ - "model": tt.modelID, + "model": tt.inputModel, "messages": []anthropic.MessageParam{ { Role: anthropic.MessageParamRoleUser, Content: []anthropic.ContentBlockParamUnion{ - anthropic.NewTextBlock("Test"), + anthropic.NewTextBlock("What's the weather in San Francisco?"), + }, + }, + }, + "max_tokens": 1024, + "temperature": 0.7, + "stream": tt.stream, + "system": "You are a helpful weather assistant.", + "tools": []anthropic.ToolParam{ + { + Name: "get_weather", + Description: anthropic.String("Get current weather for a location"), + InputSchema: anthropic.ToolInputSchemaParam{ + Type: "object", + Properties: map[string]any{ + "location": map[string]any{ + "type": "string", + "description": "City name", + }, + }, + Required: []string{"location"}, }, }, }, } - headerMutation, _, err := translator.RequestBody(nil, originalReq, false) + rawBody, err := json.Marshal(originalReq) require.NoError(t, err) - require.NotNil(t, headerMutation) - pathHeader := headerMutation.SetHeaders[0] - assert.Equal(t, tt.expectedPath, string(pathHeader.Header.RawValue)) + // Transform the request + reqHeaderMutation, reqBodyMutation, err := translator.RequestBody(rawBody, originalReq, false) + require.NoError(t, err) + require.NotNil(t, reqHeaderMutation) + require.NotNil(t, reqBodyMutation) + + // Verify request transformations + t.Run("request_transformations", func(t *testing.T) { + // Check path is set correctly + pathHeader := reqHeaderMutation.SetHeaders[len(reqHeaderMutation.SetHeaders)-1] + assert.Equal(t, ":path", pathHeader.Header.Key) + assert.Equal(t, tt.expectedPath, string(pathHeader.Header.RawValue)) + + // Check body transformations + var transformedReq map[string]any + err = json.Unmarshal(reqBodyMutation.GetBody(), &transformedReq) + require.NoError(t, err) + + // anthropic_version should be added + assert.Equal(t, tt.apiVersion, transformedReq["anthropic_version"]) + + // model field should be removed (it's in the path) + _, hasModel := transformedReq["model"] + assert.False(t, hasModel, "model field should be removed from body") + + // Other fields should be preserved + assert.Equal(t, float64(1024), transformedReq["max_tokens"]) + assert.Equal(t, 0.7, transformedReq["temperature"]) + assert.Equal(t, tt.stream, transformedReq["stream"]) + assert.Equal(t, "You are a helpful weather assistant.", transformedReq["system"]) + assert.NotNil(t, transformedReq["messages"]) + assert.NotNil(t, transformedReq["tools"]) + + // Content-length header should be set + var contentLengthFound bool + for _, header := range reqHeaderMutation.SetHeaders { + if header.Header.Key == "content-length" { + contentLengthFound = true + break + } + } + assert.True(t, contentLengthFound, "content-length header should be set") + }) + + respHeaders := map[string]string{ + "content-type": "application/json", + } + + // Test ResponseHeaders (should be passthrough) + respHeaderMutation, err := translator.ResponseHeaders(respHeaders) + require.NoError(t, err) + assert.Nil(t, respHeaderMutation, "ResponseHeaders should return nil for passthrough") + + if tt.stream { + // Test streaming response + t.Run("streaming_response", func(t *testing.T) { + // Message start chunk + // Note: The model in the streaming response may differ from the request model + // AWS Bedrock returns "claude-3-haiku-20240307" while request had "anthropic.claude-3-haiku-20240307-v1:0" + messageStartChunk := `event: message_start +data: {"type":"message_start","message":{"id":"msg_123","type":"message","role":"assistant","content":[],"model":"claude-3-haiku-20240307","usage":{"input_tokens":50,"output_tokens":0}}} + +` + bodyReader := bytes.NewReader([]byte(messageStartChunk)) + headerMutation, bodyMutation, tokenUsage, responseModel, err := translator.ResponseBody(respHeaders, bodyReader, false) + require.NoError(t, err) + assert.Nil(t, headerMutation, "streaming chunks should not modify headers") + assert.Nil(t, bodyMutation, "streaming chunks should pass through") + // Token usage extraction from streaming chunks depends on buffering implementation + // Just verify the extraction works and returns valid data + assert.GreaterOrEqual(t, tokenUsage.InputTokens, uint32(0), "input tokens should be non-negative") + assert.GreaterOrEqual(t, tokenUsage.TotalTokens, uint32(0), "total tokens should be non-negative") + // Response model can be either the full request model or the model from the response + assert.NotEmpty(t, responseModel, "response model should be set") + + // Content delta chunk + contentDeltaChunk := `event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}} + +` + bodyReader = bytes.NewReader([]byte(contentDeltaChunk)) + headerMutation, bodyMutation, tokenUsage, responseModel, err = translator.ResponseBody(respHeaders, bodyReader, false) + require.NoError(t, err) + assert.Nil(t, headerMutation, "streaming chunks should not modify headers") + assert.Nil(t, bodyMutation, "streaming chunks should pass through") + assert.Equal(t, uint32(0), tokenUsage.InputTokens) + assert.Equal(t, uint32(0), tokenUsage.OutputTokens) + + // Message delta chunk with final token usage + messageDeltaChunk := `event: message_delta +data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":25}} + +` + bodyReader = bytes.NewReader([]byte(messageDeltaChunk)) + headerMutation, bodyMutation, tokenUsage, responseModel, err = translator.ResponseBody(respHeaders, bodyReader, false) + require.NoError(t, err) + assert.Nil(t, headerMutation, "streaming chunks should not modify headers") + assert.Nil(t, bodyMutation, "streaming chunks should pass through") + // Token usage is buffered and extracted across chunks + assert.GreaterOrEqual(t, tokenUsage.OutputTokens, uint32(0), "output tokens should be non-negative") + assert.GreaterOrEqual(t, tokenUsage.TotalTokens, uint32(0), "total tokens should be non-negative") + assert.NotEmpty(t, responseModel, "response model should be set") + + // Message stop chunk + messageStopChunk := `event: message_stop +data: {"type":"message_stop"} + +` + bodyReader = bytes.NewReader([]byte(messageStopChunk)) + headerMutation, bodyMutation, tokenUsage, responseModel, err = translator.ResponseBody(respHeaders, bodyReader, false) + require.NoError(t, err) + assert.Nil(t, headerMutation, "streaming chunks should not modify headers") + assert.Nil(t, bodyMutation, "streaming chunks should pass through") + assert.Equal(t, uint32(0), tokenUsage.InputTokens) + assert.Equal(t, uint32(0), tokenUsage.OutputTokens) + }) + } else { + // Test non-streaming response + t.Run("non_streaming_response", func(t *testing.T) { + respBody := anthropic.Message{ + ID: "msg_test_response", + Type: "message", + Role: "assistant", + Content: []anthropic.ContentBlockUnion{ + { + Type: "text", + Text: "The weather in San Francisco is sunny with a temperature of 72°F.", + }, + }, + Model: "claude-3-sonnet-20240229", + StopReason: anthropic.StopReasonEndTurn, + Usage: anthropic.Usage{ + InputTokens: 45, + OutputTokens: 28, + }, + } + + bodyBytes, err := json.Marshal(respBody) + require.NoError(t, err) + + bodyReader := bytes.NewReader(bodyBytes) + respHeaderMutation, respBodyMutation, tokenUsage, responseModel, err := translator.ResponseBody(respHeaders, bodyReader, true) + require.NoError(t, err) + + // AWS Bedrock response is passthrough - no mutations + assert.Nil(t, respHeaderMutation, "response should pass through without header mutations") + assert.Nil(t, respBodyMutation, "response should pass through without body mutations") + + // Verify token usage extraction + expectedUsage := LLMTokenUsage{ + InputTokens: 45, + OutputTokens: 28, + TotalTokens: 73, + } + assert.Equal(t, expectedUsage, tokenUsage) + + // Response model should match request model (or the model from response if available) + // The model in the response is "claude-3-sonnet-20240229" but we stored the full ID + // The implementation uses response model if available, falling back to request model + assert.NotEmpty(t, responseModel, "response model should be set") + }) + } }) } } From 7d6cd4766299bdae27298c4ba7b6d14283fd84c6 Mon Sep 17 00:00:00 2001 From: secustor Date: Sat, 25 Oct 2025 20:59:07 +0200 Subject: [PATCH 09/15] test: add upstream test Signed-off-by: secustor --- tests/extproc/envoy.yaml | 67 ++++++++++++++++++++ tests/extproc/extproc_test.go | 5 +- tests/extproc/real_providers_test.go | 4 -- tests/extproc/testupstream_test.go | 95 ++++++++++++++++++++++++++++ 4 files changed, 166 insertions(+), 5 deletions(-) diff --git a/tests/extproc/envoy.yaml b/tests/extproc/envoy.yaml index ffe1af0481..c376fb2c72 100644 --- a/tests/extproc/envoy.yaml +++ b/tests/extproc/envoy.yaml @@ -222,6 +222,14 @@ static_resources: exact: gcp-anthropicai route: cluster: testupstream-gcp-anthropicai + - match: + prefix: "/" + headers: + - name: x-test-backend + string_match: + exact: aws-anthropic + route: + cluster: testupstream-aws-anthropic - match: prefix: "/" headers: @@ -843,6 +851,65 @@ static_resources: filter_metadata: aigateway.envoy.io: per_route_rule_backend_name: "testupstream-gcp-anthropicai" + - name: testupstream-aws-anthropic + connect_timeout: 0.25s + type: STATIC + lb_policy: ROUND_ROBIN + outlier_detection: + consecutive_5xx: 1 + interval: 1s + base_ejection_time: 2s # Must be smaller than the require.Eventually's interval. Otherwise, the tests may pass without going through the fallback since the always-failing backend could be ejected by the time when require.Eventually retries due to the previous request IF the retry is not configured. + max_ejection_percent: 100 + typed_extension_protocol_options: + envoy.extensions.upstreams.http.v3.HttpProtocolOptions: + "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions + explicit_http_config: + http_protocol_options: {} + http_filters: + - name: upstream_extproc + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor + request_attributes: + - xds.upstream_host_metadata + processing_mode: + request_header_mode: "SEND" + request_body_mode: "NONE" + response_header_mode: "SKIP" + response_body_mode: "NONE" + grpc_service: + envoy_grpc: + cluster_name: extproc_cluster + metadataOptions: + receivingNamespaces: + untyped: + - io.envoy.ai_gateway + - name: envoy.filters.http.header_mutation + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.header_mutation.v3.HeaderMutation + mutations: + request_mutations: + - append: + append_action: ADD_IF_ABSENT + header: + key: content-length + value: "%DYNAMIC_METADATA(io.envoy.ai_gateway:content_length)%" + - name: envoy.filters.http.upstream_codec + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.upstream_codec.v3.UpstreamCodec + load_assignment: + cluster_name: testupstream-aws-anthropic + endpoints: + - priority: 0 + lb_endpoints: + - endpoint: + address: + socket_address: + address: 127.0.0.1 + port_value: 8080 + metadata: + filter_metadata: + aigateway.envoy.io: + per_route_rule_backend_name: "testupstream-aws-anthropic" - name: openai connect_timeout: 30s type: STRICT_DNS diff --git a/tests/extproc/extproc_test.go b/tests/extproc/extproc_test.go index 3b3b08df39..2306d0242c 100644 --- a/tests/extproc/extproc_test.go +++ b/tests/extproc/extproc_test.go @@ -36,7 +36,7 @@ const ( var ( openAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1"} awsBedrockSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAWSBedrock} - awsAnthropicSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAWSAnthropic} + awsAnthropicSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAWSAnthropic, Version: "bedrock-2023-05-31"} azureOpenAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAzureOpenAI, Version: "2025-01-01-preview"} gcpVertexAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaGCPVertexAI} gcpAnthropicAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaGCPAnthropic, Version: "vertex-2023-10-16"} @@ -60,6 +60,9 @@ var ( Region: "gcp-region", ProjectName: "gcp-project-name", }}} + testUpstreamAWSAnthropicBackend = filterapi.Backend{Name: "testupstream-aws-anthropic", Schema: awsAnthropicSchema, Auth: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{ + Region: "us-east-1", + }}} alwaysFailingBackend = filterapi.Backend{Name: "always-failing-backend", Schema: openAISchema} // envoyConfig is the embedded Envoy configuration template. diff --git a/tests/extproc/real_providers_test.go b/tests/extproc/real_providers_test.go index 843816b01b..fc51add5a4 100644 --- a/tests/extproc/real_providers_test.go +++ b/tests/extproc/real_providers_test.go @@ -50,10 +50,6 @@ func TestWithRealProviders(t *testing.T) { CredentialFileLiteral: cc.AWSFileLiteral, Region: "us-east-1", }}}, - {Name: "aws-bedrock-anthropic", Schema: awsAnthropicSchema, Auth: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{ - CredentialFileLiteral: cc.AWSFileLiteral, - Region: "us-east-1", - }}}, {Name: "azure-openai", Schema: azureOpenAISchema, Auth: &filterapi.BackendAuth{ AzureAuth: &filterapi.AzureAuth{AccessToken: cc.AzureAccessToken}, }}, diff --git a/tests/extproc/testupstream_test.go b/tests/extproc/testupstream_test.go index 124cb33b5e..9c0f1b3251 100644 --- a/tests/extproc/testupstream_test.go +++ b/tests/extproc/testupstream_test.go @@ -61,6 +61,7 @@ func TestWithTestUpstream(t *testing.T) { testUpstreamAzureBackend, testUpstreamGCPVertexAIBackend, testUpstreamGCPAnthropicAIBackend, + testUpstreamAWSAnthropicBackend, { Name: "testupstream-openai-5xx", Schema: openAISchema, HeaderMutation: &filterapi.HTTPHeaderMutation{ Set: []filterapi.HTTPHeader{{Name: testupstreamlib.ResponseStatusKey, Value: "500"}}, @@ -953,6 +954,100 @@ data: {"type":"message_stop" } `, expStatus: http.StatusOK, }, + { + name: "aws-anthropic - /anthropic/v1/messages", + backend: "aws-anthropic", + path: "/anthropic/v1/messages", + method: http.MethodPost, + requestBody: `{"model":"anthropic.claude-3-sonnet-20240229-v1:0","max_tokens":100,"messages":[{"role":"user","content":[{"type":"text","text":"Hello from AWS!"}]}],"stream":false}`, + expRequestBody: `{"anthropic_version":"bedrock-2023-05-31","max_tokens":100,"messages":[{"content":[{"text":"Hello from AWS!","type":"text"}],"role":"user"}],"stream":false}`, + expPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke", + responseStatus: strconv.Itoa(http.StatusOK), + responseBody: `{"id":"msg_aws_123","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hello from AWS Anthropic!"}],"usage":{"input_tokens":10,"output_tokens":20}}`, + expStatus: http.StatusOK, + expResponseBody: `{"id":"msg_aws_123","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hello from AWS Anthropic!"}],"usage":{"input_tokens":10,"output_tokens":20}}`, + }, + { + name: "aws-anthropic - /anthropic/v1/messages - streaming", + backend: "aws-anthropic", + path: "/anthropic/v1/messages", + method: http.MethodPost, + responseType: "sse", + requestBody: `{"model":"anthropic.claude-3-haiku-20240307-v1:0","max_tokens":150,"messages":[{"role":"user","content":[{"type":"text","text":"Tell me a joke"}]}],"stream":true}`, + expRequestBody: `{"anthropic_version":"bedrock-2023-05-31","max_tokens":150,"messages":[{"content":[{"text":"Tell me a joke","type":"text"}],"role":"user"}],"stream":true}`, + expPath: "/model/anthropic.claude-3-haiku-20240307-v1:0/invoke-stream", + responseStatus: strconv.Itoa(http.StatusOK), + responseBody: `event: message_start +data: {"type":"message_start","message":{"id":"msg_aws_456","usage":{"input_tokens":12}}} + +event: content_block_start +data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Why did the"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" chicken cross the road?"}} + +event: content_block_stop +data: {"type":"content_block_stop","index":0} + +event: message_delta +data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":18}} + +event: message_stop +data: {"type":"message_stop"} + +`, + expStatus: http.StatusOK, + expResponseBody: `event: message_start +data: {"type":"message_start","message":{"id":"msg_aws_456","usage":{"input_tokens":12}}} + +event: content_block_start +data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Why did the"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" chicken cross the road?"}} + +event: content_block_stop +data: {"type":"content_block_stop","index":0} + +event: message_delta +data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":18}} + +event: message_stop +data: {"type":"message_stop"} + +`, + }, + { + name: "aws-anthropic - /anthropic/v1/messages - ARN model format", + backend: "aws-anthropic", + path: "/anthropic/v1/messages", + method: http.MethodPost, + requestBody: `{"model":"arn:aws:bedrock:eu-central-1:538639307912:application-inference-profile/k375tnm6nr0t","max_tokens":50,"messages":[{"role":"user","content":[{"type":"text","text":"Hi"}]}],"stream":false}`, + expRequestBody: `{"anthropic_version":"bedrock-2023-05-31","max_tokens":50,"messages":[{"content":[{"text":"Hi","type":"text"}],"role":"user"}],"stream":false}`, + expPath: "/model/arn:aws:bedrock:eu-central-1:538639307912:application-inference-profile%2Fk375tnm6nr0t/invoke", + responseStatus: strconv.Itoa(http.StatusOK), + responseBody: `{"id":"msg_arn_789","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hi there!"}],"usage":{"input_tokens":5,"output_tokens":8}}`, + expStatus: http.StatusOK, + expResponseBody: `{"id":"msg_arn_789","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hi there!"}],"usage":{"input_tokens":5,"output_tokens":8}}`, + }, + { + name: "aws-anthropic - /anthropic/v1/messages - error response", + backend: "aws-anthropic", + path: "/anthropic/v1/messages", + method: http.MethodPost, + requestBody: `{"model":"anthropic.claude-3-sonnet-20240229-v1:0","max_tokens":100,"messages":[{"role":"user","content":[{"type":"text","text":"Test error"}]}]}`, + expPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke", + responseStatus: strconv.Itoa(http.StatusBadRequest), + responseBody: `{"type":"error","error":{"type":"validation_error","message":"Invalid request format"}}`, + expStatus: http.StatusBadRequest, + expResponseBody: `{"type":"error","error":{"type":"validation_error","message":"Invalid request format"}}`, + }, } { t.Run(tc.name, func(t *testing.T) { listenerAddress := fmt.Sprintf("http://localhost:%d", listenerPort) From b60bdf676c48dc9d1bdc9bdcf0feac8cb6791667 Mon Sep 17 00:00:00 2001 From: secustor Date: Tue, 28 Oct 2025 15:52:32 +0100 Subject: [PATCH 10/15] Remove test as requested Signed-off-by: secustor --- internal/extproc/messages_processor_test.go | 78 ----- .../translator/anthropic_awsanthropic.go | 17 -- .../translator/anthropic_awsanthropic_test.go | 281 ------------------ 3 files changed, 376 deletions(-) diff --git a/internal/extproc/messages_processor_test.go b/internal/extproc/messages_processor_test.go index 3c09a376da..7d0b696c4e 100644 --- a/internal/extproc/messages_processor_test.go +++ b/internal/extproc/messages_processor_test.go @@ -826,84 +826,6 @@ func TestMessagesProcessorUpstreamFilter_ProcessRequestHeaders_WithHeaderMutatio // Check that original headers remain unchanged. require.Equal(t, "bearer token123", headers["authorization"]) }) - - t.Run("multiple header mutations with same key - last one wins", func(t *testing.T) { - headers := map[string]string{ - ":path": "/anthropic/v1/messages", - "x-ai-eg-model": "anthropic.claude-3-haiku-20240307-v1:0", - } - - // Create request body. - requestBody := &anthropicschema.MessagesRequest{ - "model": "anthropic.claude-3-haiku-20240307-v1:0", - "max_tokens": 1000, - "messages": []any{map[string]any{"role": "user", "content": "Hello"}}, - } - requestBodyRaw := []byte(`{"model": "anthropic.claude-3-haiku-20240307-v1:0", "max_tokens": 1000, "messages": [{"role": "user", "content": "Hello"}]}`) - - // Create mock translator that returns multiple header mutations for the same key. - // This simulates a scenario where the translator sets :path multiple times. - mockTranslator := mockAnthropicTranslator{ - t: t, - expRequestBody: requestBody, - expForceRequestBodyMutation: false, - retHeaderMutation: &extprocv3.HeaderMutation{ - SetHeaders: []*corev3.HeaderValueOption{ - { - Header: &corev3.HeaderValue{ - Key: ":path", - RawValue: []byte("/anthropic/v1/messages"), - }, - }, - { - Header: &corev3.HeaderValue{ - Key: ":path", - RawValue: []byte("/model/anthropic.claude-3-haiku-20240307-v1:0/invoke"), - }, - }, - }, - }, - retBodyMutation: &extprocv3.BodyMutation{}, - retErr: nil, - } - - // Create mock metrics. - chatMetrics := metrics.NewChatCompletionFactory(noop.NewMeterProvider().Meter("test"), map[string]string{})() - - // Create processor. - processor := &messagesProcessorUpstreamFilter{ - config: &processorConfig{}, - requestHeaders: headers, - logger: slog.Default(), - metrics: chatMetrics, - translator: mockTranslator, - originalRequestBody: requestBody, - originalRequestBodyRaw: requestBodyRaw, - handler: &mockBackendAuthHandler{}, - } - - ctx := context.Background() - response, err := processor.ProcessRequestHeaders(ctx, nil) - - require.NoError(t, err) - require.NotNil(t, response) - - commonRes := response.Response.(*extprocv3.ProcessingResponse_RequestHeaders).RequestHeaders.Response - - // Check that header mutations were applied. - require.NotNil(t, commonRes.HeaderMutation) - require.Len(t, commonRes.HeaderMutation.SetHeaders, 2) - - // Verify that both header mutations are present, with the last one being the final value. - require.Equal(t, ":path", commonRes.HeaderMutation.SetHeaders[0].Header.Key) - require.Equal(t, []byte("/anthropic/v1/messages"), commonRes.HeaderMutation.SetHeaders[0].Header.RawValue) - - require.Equal(t, ":path", commonRes.HeaderMutation.SetHeaders[1].Header.Key) - require.Equal(t, []byte("/model/anthropic.claude-3-haiku-20240307-v1:0/invoke"), commonRes.HeaderMutation.SetHeaders[1].Header.RawValue) - - // The last mutation should win - verify the header value in the processor's headers. - require.Equal(t, "/model/anthropic.claude-3-haiku-20240307-v1:0/invoke", headers[":path"]) - }) } func TestMessagesProcessorUpstreamFilter_SetBackend_WithHeaderMutations(t *testing.T) { diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go index cc61535364..d4f15c8b19 100644 --- a/internal/extproc/translator/anthropic_awsanthropic.go +++ b/internal/extproc/translator/anthropic_awsanthropic.go @@ -7,7 +7,6 @@ package translator import ( "fmt" - "io" "net/url" corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" @@ -87,19 +86,3 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(rawBody []byte, body *an }) return } - -// ResponseHeaders implements [AnthropicMessagesTranslator.ResponseHeaders] for Anthropic to AWS Bedrock Anthropic. -func (a *anthropicToAWSAnthropicTranslator) ResponseHeaders(_ map[string]string) ( - headerMutation *extprocv3.HeaderMutation, err error, -) { - // For Anthropic to AWS Bedrock Anthropic, no header transformation is needed. - return nil, nil -} - -// ResponseBody implements [AnthropicMessagesTranslator.ResponseBody] for Anthropic to AWS Bedrock Anthropic. -// This is essentially a passthrough since AWS Bedrock returns the native Anthropic response format. -func (a *anthropicToAWSAnthropicTranslator) ResponseBody(respHeaders map[string]string, body io.Reader, endOfStream bool) ( - headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error, -) { - return a.anthropicToAnthropicTranslator.ResponseBody(respHeaders, body, endOfStream) -} diff --git a/internal/extproc/translator/anthropic_awsanthropic_test.go b/internal/extproc/translator/anthropic_awsanthropic_test.go index 6c4294401f..35adb5efa0 100644 --- a/internal/extproc/translator/anthropic_awsanthropic_test.go +++ b/internal/extproc/translator/anthropic_awsanthropic_test.go @@ -6,7 +6,6 @@ package translator import ( - "bytes" "encoding/json" "testing" @@ -407,42 +406,6 @@ func TestAnthropicToAWSAnthropicTranslator_ResponseHeaders(t *testing.T) { } } -func TestAnthropicToAWSAnthropicTranslator_ResponseBody_WithCachedTokens(t *testing.T) { - translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") - - // Test response with cached input tokens. - respBody := anthropic.Message{ - ID: "msg_cached", - Type: "message", - Role: "assistant", - Content: []anthropic.ContentBlockUnion{{Type: "text", Text: "Response with cache"}}, - Model: "claude-3-sonnet-20240229", - Usage: anthropic.Usage{ - InputTokens: 50, - OutputTokens: 20, - CacheReadInputTokens: 30, - CacheCreationInputTokens: 10, - }, - } - - bodyBytes, err := json.Marshal(respBody) - require.NoError(t, err) - - bodyReader := bytes.NewReader(bodyBytes) - respHeaders := map[string]string{"content-type": "application/json"} - - _, _, tokenUsage, _, err := translator.ResponseBody(respHeaders, bodyReader, true) - require.NoError(t, err) - - expectedUsage := LLMTokenUsage{ - InputTokens: 50, - OutputTokens: 20, - TotalTokens: 70, - CachedInputTokens: 30, - } - assert.Equal(t, expectedUsage, tokenUsage) -} - func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) { tests := []struct { name string @@ -495,247 +458,3 @@ func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) { }) } } - -func TestAnthropicToAWSAnthropicTranslator_FullRequestResponseFlow(t *testing.T) { - tests := []struct { - name string - apiVersion string - modelNameOverride string - inputModel string - stream bool - expectedPath string - expectedModel string // Expected model in translator state for response - }{ - { - name: "non-streaming without override", - apiVersion: "bedrock-2023-05-31", - modelNameOverride: "", - inputModel: "anthropic.claude-3-sonnet-20240229-v1:0", - stream: false, - expectedPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke", - expectedModel: "anthropic.claude-3-sonnet-20240229-v1:0", - }, - { - name: "streaming without override", - apiVersion: "bedrock-2023-05-31", - modelNameOverride: "", - inputModel: "anthropic.claude-3-haiku-20240307-v1:0", - stream: true, - expectedPath: "/model/anthropic.claude-3-haiku-20240307-v1:0/invoke-stream", - expectedModel: "anthropic.claude-3-haiku-20240307-v1:0", - }, - { - name: "non-streaming with model override", - apiVersion: "bedrock-2023-05-31", - modelNameOverride: "anthropic.claude-3-opus-20240229-v1:0", - inputModel: "anthropic.claude-3-haiku-20240307-v1:0", - stream: false, - expectedPath: "/model/anthropic.claude-3-opus-20240229-v1:0/invoke", - expectedModel: "anthropic.claude-3-opus-20240229-v1:0", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - translator := NewAnthropicToAWSAnthropicTranslator(tt.apiVersion, tt.modelNameOverride) - - originalReq := &anthropicschema.MessagesRequest{ - "model": tt.inputModel, - "messages": []anthropic.MessageParam{ - { - Role: anthropic.MessageParamRoleUser, - Content: []anthropic.ContentBlockParamUnion{ - anthropic.NewTextBlock("What's the weather in San Francisco?"), - }, - }, - }, - "max_tokens": 1024, - "temperature": 0.7, - "stream": tt.stream, - "system": "You are a helpful weather assistant.", - "tools": []anthropic.ToolParam{ - { - Name: "get_weather", - Description: anthropic.String("Get current weather for a location"), - InputSchema: anthropic.ToolInputSchemaParam{ - Type: "object", - Properties: map[string]any{ - "location": map[string]any{ - "type": "string", - "description": "City name", - }, - }, - Required: []string{"location"}, - }, - }, - }, - } - - rawBody, err := json.Marshal(originalReq) - require.NoError(t, err) - - // Transform the request - reqHeaderMutation, reqBodyMutation, err := translator.RequestBody(rawBody, originalReq, false) - require.NoError(t, err) - require.NotNil(t, reqHeaderMutation) - require.NotNil(t, reqBodyMutation) - - // Verify request transformations - t.Run("request_transformations", func(t *testing.T) { - // Check path is set correctly - pathHeader := reqHeaderMutation.SetHeaders[len(reqHeaderMutation.SetHeaders)-1] - assert.Equal(t, ":path", pathHeader.Header.Key) - assert.Equal(t, tt.expectedPath, string(pathHeader.Header.RawValue)) - - // Check body transformations - var transformedReq map[string]any - err = json.Unmarshal(reqBodyMutation.GetBody(), &transformedReq) - require.NoError(t, err) - - // anthropic_version should be added - assert.Equal(t, tt.apiVersion, transformedReq["anthropic_version"]) - - // model field should be removed (it's in the path) - _, hasModel := transformedReq["model"] - assert.False(t, hasModel, "model field should be removed from body") - - // Other fields should be preserved - assert.Equal(t, float64(1024), transformedReq["max_tokens"]) - assert.Equal(t, 0.7, transformedReq["temperature"]) - assert.Equal(t, tt.stream, transformedReq["stream"]) - assert.Equal(t, "You are a helpful weather assistant.", transformedReq["system"]) - assert.NotNil(t, transformedReq["messages"]) - assert.NotNil(t, transformedReq["tools"]) - - // Content-length header should be set - var contentLengthFound bool - for _, header := range reqHeaderMutation.SetHeaders { - if header.Header.Key == "content-length" { - contentLengthFound = true - break - } - } - assert.True(t, contentLengthFound, "content-length header should be set") - }) - - respHeaders := map[string]string{ - "content-type": "application/json", - } - - // Test ResponseHeaders (should be passthrough) - respHeaderMutation, err := translator.ResponseHeaders(respHeaders) - require.NoError(t, err) - assert.Nil(t, respHeaderMutation, "ResponseHeaders should return nil for passthrough") - - if tt.stream { - // Test streaming response - t.Run("streaming_response", func(t *testing.T) { - // Message start chunk - // Note: The model in the streaming response may differ from the request model - // AWS Bedrock returns "claude-3-haiku-20240307" while request had "anthropic.claude-3-haiku-20240307-v1:0" - messageStartChunk := `event: message_start -data: {"type":"message_start","message":{"id":"msg_123","type":"message","role":"assistant","content":[],"model":"claude-3-haiku-20240307","usage":{"input_tokens":50,"output_tokens":0}}} - -` - bodyReader := bytes.NewReader([]byte(messageStartChunk)) - headerMutation, bodyMutation, tokenUsage, responseModel, err := translator.ResponseBody(respHeaders, bodyReader, false) - require.NoError(t, err) - assert.Nil(t, headerMutation, "streaming chunks should not modify headers") - assert.Nil(t, bodyMutation, "streaming chunks should pass through") - // Token usage extraction from streaming chunks depends on buffering implementation - // Just verify the extraction works and returns valid data - assert.GreaterOrEqual(t, tokenUsage.InputTokens, uint32(0), "input tokens should be non-negative") - assert.GreaterOrEqual(t, tokenUsage.TotalTokens, uint32(0), "total tokens should be non-negative") - // Response model can be either the full request model or the model from the response - assert.NotEmpty(t, responseModel, "response model should be set") - - // Content delta chunk - contentDeltaChunk := `event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}} - -` - bodyReader = bytes.NewReader([]byte(contentDeltaChunk)) - headerMutation, bodyMutation, tokenUsage, responseModel, err = translator.ResponseBody(respHeaders, bodyReader, false) - require.NoError(t, err) - assert.Nil(t, headerMutation, "streaming chunks should not modify headers") - assert.Nil(t, bodyMutation, "streaming chunks should pass through") - assert.Equal(t, uint32(0), tokenUsage.InputTokens) - assert.Equal(t, uint32(0), tokenUsage.OutputTokens) - - // Message delta chunk with final token usage - messageDeltaChunk := `event: message_delta -data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":25}} - -` - bodyReader = bytes.NewReader([]byte(messageDeltaChunk)) - headerMutation, bodyMutation, tokenUsage, responseModel, err = translator.ResponseBody(respHeaders, bodyReader, false) - require.NoError(t, err) - assert.Nil(t, headerMutation, "streaming chunks should not modify headers") - assert.Nil(t, bodyMutation, "streaming chunks should pass through") - // Token usage is buffered and extracted across chunks - assert.GreaterOrEqual(t, tokenUsage.OutputTokens, uint32(0), "output tokens should be non-negative") - assert.GreaterOrEqual(t, tokenUsage.TotalTokens, uint32(0), "total tokens should be non-negative") - assert.NotEmpty(t, responseModel, "response model should be set") - - // Message stop chunk - messageStopChunk := `event: message_stop -data: {"type":"message_stop"} - -` - bodyReader = bytes.NewReader([]byte(messageStopChunk)) - headerMutation, bodyMutation, tokenUsage, responseModel, err = translator.ResponseBody(respHeaders, bodyReader, false) - require.NoError(t, err) - assert.Nil(t, headerMutation, "streaming chunks should not modify headers") - assert.Nil(t, bodyMutation, "streaming chunks should pass through") - assert.Equal(t, uint32(0), tokenUsage.InputTokens) - assert.Equal(t, uint32(0), tokenUsage.OutputTokens) - }) - } else { - // Test non-streaming response - t.Run("non_streaming_response", func(t *testing.T) { - respBody := anthropic.Message{ - ID: "msg_test_response", - Type: "message", - Role: "assistant", - Content: []anthropic.ContentBlockUnion{ - { - Type: "text", - Text: "The weather in San Francisco is sunny with a temperature of 72°F.", - }, - }, - Model: "claude-3-sonnet-20240229", - StopReason: anthropic.StopReasonEndTurn, - Usage: anthropic.Usage{ - InputTokens: 45, - OutputTokens: 28, - }, - } - - bodyBytes, err := json.Marshal(respBody) - require.NoError(t, err) - - bodyReader := bytes.NewReader(bodyBytes) - respHeaderMutation, respBodyMutation, tokenUsage, responseModel, err := translator.ResponseBody(respHeaders, bodyReader, true) - require.NoError(t, err) - - // AWS Bedrock response is passthrough - no mutations - assert.Nil(t, respHeaderMutation, "response should pass through without header mutations") - assert.Nil(t, respBodyMutation, "response should pass through without body mutations") - - // Verify token usage extraction - expectedUsage := LLMTokenUsage{ - InputTokens: 45, - OutputTokens: 28, - TotalTokens: 73, - } - assert.Equal(t, expectedUsage, tokenUsage) - - // Response model should match request model (or the model from response if available) - // The model in the response is "claude-3-sonnet-20240229" but we stored the full ID - // The implementation uses response model if available, falling back to request model - assert.NotEmpty(t, responseModel, "response model should be set") - }) - } - }) - } -} From 55330466e7091513c24ea7cd8f977e213e66fe51 Mon Sep 17 00:00:00 2001 From: secustor Date: Tue, 28 Oct 2025 15:52:51 +0100 Subject: [PATCH 11/15] overwrite via header mutation Signed-off-by: secustor --- .../extproc/translator/anthropic_awsanthropic.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go index d4f15c8b19..577d49a090 100644 --- a/internal/extproc/translator/anthropic_awsanthropic.go +++ b/internal/extproc/translator/anthropic_awsanthropic.go @@ -60,9 +60,6 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(rawBody []byte, body *an Mutation: &extprocv3.BodyMutation_Body{Body: preparedBody}, } - // update content length after changing the body - setContentLength(headerMutation, preparedBody) - // Determine the AWS Bedrock path based on whether streaming is requested. var pathTemplate string if body.GetStream() { @@ -79,10 +76,18 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(rawBody []byte, body *an // Overwriting path of the Anthropic to Anthropic translator headerMutation.SetHeaders = append(headerMutation.SetHeaders, &corev3.HeaderValueOption{ + AppendAction: corev3.HeaderValueOption_OVERWRITE_IF_EXISTS, Header: &corev3.HeaderValue{ Key: ":path", RawValue: []byte(pathSuffix), }, - }) + }, + &corev3.HeaderValueOption{ + AppendAction: corev3.HeaderValueOption_OVERWRITE_IF_EXISTS, + Header: &corev3.HeaderValue{ + Key: "content-length", + RawValue: fmt.Appendf(nil, "%d", len(preparedBody)), + }, + }) return } From 84f31624ee2bb61755a2a111d900f91d80f4bb25 Mon Sep 17 00:00:00 2001 From: secustor Date: Wed, 29 Oct 2025 18:25:50 +0100 Subject: [PATCH 12/15] remove another test Signed-off-by: secustor --- .../translator/anthropic_awsanthropic_test.go | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/internal/extproc/translator/anthropic_awsanthropic_test.go b/internal/extproc/translator/anthropic_awsanthropic_test.go index 35adb5efa0..1afefdcec4 100644 --- a/internal/extproc/translator/anthropic_awsanthropic_test.go +++ b/internal/extproc/translator/anthropic_awsanthropic_test.go @@ -376,36 +376,6 @@ func TestAnthropicToAWSAnthropicTranslator_RequestBody_FieldPassthrough(t *testi require.Equal(t, "bedrock-2023-05-31", version, "anthropic_version should match the configured version") } -func TestAnthropicToAWSAnthropicTranslator_ResponseHeaders(t *testing.T) { - translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") - - tests := []struct { - name string - headers map[string]string - }{ - { - name: "empty headers", - headers: map[string]string{}, - }, - { - name: "various headers", - headers: map[string]string{ - "content-type": "application/json", - "authorization": "Bearer token", - "custom-header": "value", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - headerMutation, err := translator.ResponseHeaders(tt.headers) - require.NoError(t, err) - assert.Nil(t, headerMutation, "ResponseHeaders should return nil for passthrough") - }) - } -} - func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) { tests := []struct { name string From 9af975bbe74cb337da81ab612911d97c3eed29ae Mon Sep 17 00:00:00 2001 From: secustor Date: Wed, 29 Oct 2025 18:35:53 +0100 Subject: [PATCH 13/15] fixup tests because of header change Signed-off-by: secustor --- .../extproc/translator/anthropic_awsanthropic_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/extproc/translator/anthropic_awsanthropic_test.go b/internal/extproc/translator/anthropic_awsanthropic_test.go index 1afefdcec4..ba8a50eeb9 100644 --- a/internal/extproc/translator/anthropic_awsanthropic_test.go +++ b/internal/extproc/translator/anthropic_awsanthropic_test.go @@ -88,7 +88,7 @@ func TestAnthropicToAWSAnthropicTranslator_RequestBody_ModelNameOverride(t *test // Check path header contains expected model (URL encoded). // Use the last element as it takes precedence when multiple headers are set. - pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-1] + pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-2] require.Equal(t, ":path", pathHeader.Header.Key) expectedPath := "/model/" + tt.expectedInPath + "/invoke" assert.Equal(t, expectedPath, string(pathHeader.Header.RawValue)) @@ -206,7 +206,7 @@ func TestAnthropicToAWSAnthropicTranslator_ComprehensiveMarshalling(t *testing.T require.NotEmpty(t, toolChoice) // Use the last element as it takes precedence when multiple headers are set. - pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-1] + pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-2] require.Equal(t, ":path", pathHeader.Header.Key) expectedPath := "/model/anthropic.claude-3-opus-20240229-v1:0/invoke" require.Equal(t, expectedPath, string(pathHeader.Header.RawValue)) @@ -270,7 +270,7 @@ func TestAnthropicToAWSAnthropicTranslator_RequestBody_StreamingPaths(t *testing // Check path contains expected suffix. // Use the last element as it takes precedence when multiple headers are set. - pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-1] + pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-2] expectedPath := "/model/anthropic.claude-3-sonnet-20240229-v1:0" + tt.expectedPathSuffix assert.Equal(t, expectedPath, string(pathHeader.Header.RawValue)) }) @@ -423,7 +423,7 @@ func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) { require.NotNil(t, headerMutation) // Use the last element as it takes precedence when multiple headers are set. - pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-1] + pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-2] assert.Equal(t, tt.expectedPath, string(pathHeader.Header.RawValue)) }) } From a1f42ca9816f8394810438233b3b5e241a70ba41 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Wed, 29 Oct 2025 14:26:14 -0700 Subject: [PATCH 14/15] cleanup Signed-off-by: Takeshi Yoneda --- .../translator/anthropic_awsanthropic.go | 47 ++-- .../translator/anthropic_awsanthropic_test.go | 203 ------------------ .../llm-integrations/supported-endpoints.md | 40 ++-- tests/extproc/envoy.yaml | 140 ++++++++++++ tests/extproc/extproc_test.go | 1 + tests/extproc/real_providers_test.go | 47 ++++ tests/extproc/testupstream_test.go | 25 +-- 7 files changed, 230 insertions(+), 273 deletions(-) diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go index 577d49a090..891f7996eb 100644 --- a/internal/extproc/translator/anthropic_awsanthropic.go +++ b/internal/extproc/translator/anthropic_awsanthropic.go @@ -6,6 +6,7 @@ package translator import ( + "cmp" "fmt" "net/url" @@ -39,26 +40,17 @@ type anthropicToAWSAnthropicTranslator struct { func (a *anthropicToAWSAnthropicTranslator) RequestBody(rawBody []byte, body *anthropicschema.MessagesRequest, _ bool) ( headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, err error, ) { - // AWS Bedrock always needs a body mutation because we must add anthropic_version and remove model field - headerMutation, bodyMutation, err = a.anthropicToAnthropicTranslator.RequestBody(rawBody, body, true) - if err != nil { - return - } + a.stream = body.GetStream() + a.requestModel = cmp.Or(a.modelNameOverride, body.GetModel()) - // add anthropic_version field - preparedBody, err := sjson.SetBytes(bodyMutation.GetBody(), anthropicVersionKey, a.apiVersion) + var mutatedBody []byte + mutatedBody, err = sjson.SetBytes(rawBody, anthropicVersionKey, a.apiVersion) if err != nil { return nil, nil, fmt.Errorf("failed to set anthropic_version field: %w", err) } - // delete model field as AWS Bedrock expects model in the path, not in the body - preparedBody, err = sjson.DeleteBytes(preparedBody, "model") - if err != nil { - return nil, nil, fmt.Errorf("failed to delete model field: %w", err) - } - - bodyMutation = &extprocv3.BodyMutation{ - Mutation: &extprocv3.BodyMutation_Body{Body: preparedBody}, - } + // Remove the model field from the body as AWS Bedrock expects the model to be specified in the path. + // Otherwise, AWS complains "extra inputs are not permitted". + mutatedBody, _ = sjson.DeleteBytes(mutatedBody, "model") // Determine the AWS Bedrock path based on whether streaming is requested. var pathTemplate string @@ -72,22 +64,15 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(rawBody []byte, body *an // AWS Bedrock model IDs can be simple names (e.g., "anthropic.claude-3-5-sonnet-20241022-v2:0") // or full ARNs which may contain special characters. encodedModelID := url.PathEscape(a.requestModel) - pathSuffix := fmt.Sprintf(pathTemplate, encodedModelID) + path := fmt.Sprintf(pathTemplate, encodedModelID) - // Overwriting path of the Anthropic to Anthropic translator - headerMutation.SetHeaders = append(headerMutation.SetHeaders, &corev3.HeaderValueOption{ - AppendAction: corev3.HeaderValueOption_OVERWRITE_IF_EXISTS, - Header: &corev3.HeaderValue{ - Key: ":path", - RawValue: []byte(pathSuffix), + headerMutation = &extprocv3.HeaderMutation{ + SetHeaders: []*corev3.HeaderValueOption{ + // Overwriting path of the Anthropic to Anthropic translator + {Header: &corev3.HeaderValue{Key: ":path", RawValue: []byte(path)}}, }, - }, - &corev3.HeaderValueOption{ - AppendAction: corev3.HeaderValueOption_OVERWRITE_IF_EXISTS, - Header: &corev3.HeaderValue{ - Key: "content-length", - RawValue: fmt.Appendf(nil, "%d", len(preparedBody)), - }, - }) + } + bodyMutation = &extprocv3.BodyMutation{Mutation: &extprocv3.BodyMutation_Body{Body: mutatedBody}} + setContentLength(headerMutation, mutatedBody) return } diff --git a/internal/extproc/translator/anthropic_awsanthropic_test.go b/internal/extproc/translator/anthropic_awsanthropic_test.go index ba8a50eeb9..90097c1f0c 100644 --- a/internal/extproc/translator/anthropic_awsanthropic_test.go +++ b/internal/extproc/translator/anthropic_awsanthropic_test.go @@ -108,110 +108,6 @@ func TestAnthropicToAWSAnthropicTranslator_RequestBody_ModelNameOverride(t *test } } -func TestAnthropicToAWSAnthropicTranslator_ComprehensiveMarshalling(t *testing.T) { - translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") - - // Create a comprehensive MessagesRequest with all possible fields using map structure. - originalReq := &anthropicschema.MessagesRequest{ - "model": "anthropic.claude-3-opus-20240229-v1:0", - "messages": []anthropic.MessageParam{ - { - Role: anthropic.MessageParamRoleUser, - Content: []anthropic.ContentBlockParamUnion{ - anthropic.NewTextBlock("Hello, how are you?"), - }, - }, - { - Role: anthropic.MessageParamRoleAssistant, - Content: []anthropic.ContentBlockParamUnion{ - anthropic.NewTextBlock("I'm doing well, thank you!"), - }, - }, - { - Role: anthropic.MessageParamRoleUser, - Content: []anthropic.ContentBlockParamUnion{ - anthropic.NewTextBlock("Can you help me with the weather?"), - }, - }, - }, - "max_tokens": 1024, - "stream": false, - "temperature": func() *float64 { v := 0.7; return &v }(), - "top_p": func() *float64 { v := 0.95; return &v }(), - "top_k": func() *int { v := 40; return &v }(), - "stop_sequences": []string{"Human:", "Assistant:"}, - "system": "You are a helpful weather assistant.", - "tools": []anthropic.ToolParam{ - { - Name: "get_weather", - Description: anthropic.String("Get current weather information"), - InputSchema: anthropic.ToolInputSchemaParam{ - Type: "object", - Properties: map[string]any{ - "location": map[string]any{ - "type": "string", - "description": "City name", - }, - }, - Required: []string{"location"}, - }, - }, - }, - "tool_choice": anthropic.ToolChoiceUnionParam{ - OfAuto: &anthropic.ToolChoiceAutoParam{}, - }, - } - - rawBody, err := json.Marshal(originalReq) - require.NoError(t, err) - - headerMutation, bodyMutation, err := translator.RequestBody(rawBody, originalReq, false) - require.NoError(t, err) - require.NotNil(t, headerMutation) - require.NotNil(t, bodyMutation) - - var outputReq map[string]any - err = json.Unmarshal(bodyMutation.GetBody(), &outputReq) - require.NoError(t, err) - - require.NotContains(t, outputReq, "model", "model field should be removed for AWS Bedrock") - - // AWS Bedrock requires anthropic_version field. - require.Contains(t, outputReq, "anthropic_version", "anthropic_version should be added for AWS Bedrock") - require.Equal(t, "bedrock-2023-05-31", outputReq["anthropic_version"], "anthropic_version should match the configured version") - - messages, ok := outputReq["messages"].([]any) - require.True(t, ok, "messages should be an array") - require.Len(t, messages, 3, "should have 3 messages") - - require.Equal(t, float64(1024), outputReq["max_tokens"]) - require.Equal(t, false, outputReq["stream"]) - require.Equal(t, 0.7, outputReq["temperature"]) - require.Equal(t, 0.95, outputReq["top_p"]) - require.Equal(t, float64(40), outputReq["top_k"]) - require.Equal(t, "You are a helpful weather assistant.", outputReq["system"]) - - stopSeq, ok := outputReq["stop_sequences"].([]any) - require.True(t, ok, "stop_sequences should be an array") - require.Len(t, stopSeq, 2) - require.Equal(t, "Human:", stopSeq[0]) - require.Equal(t, "Assistant:", stopSeq[1]) - - tools, ok := outputReq["tools"].([]any) - require.True(t, ok, "tools should be an array") - require.Len(t, tools, 1) - - toolChoice, ok := outputReq["tool_choice"].(map[string]any) - require.True(t, ok, "tool_choice should be an object") - require.NotEmpty(t, toolChoice) - - // Use the last element as it takes precedence when multiple headers are set. - pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-2] - require.Equal(t, ":path", pathHeader.Header.Key) - expectedPath := "/model/anthropic.claude-3-opus-20240229-v1:0/invoke" - require.Equal(t, expectedPath, string(pathHeader.Header.RawValue)) -} - func TestAnthropicToAWSAnthropicTranslator_RequestBody_StreamingPaths(t *testing.T) { tests := []struct { name string @@ -277,105 +173,6 @@ func TestAnthropicToAWSAnthropicTranslator_RequestBody_StreamingPaths(t *testing } } -func TestAnthropicToAWSAnthropicTranslator_RequestBody_FieldPassthrough(t *testing.T) { - translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") - - temp := 0.7 - topP := 0.95 - topK := 40 - parsedReq := &anthropicschema.MessagesRequest{ - "model": "anthropic.claude-3-sonnet-20240229-v1:0", - "messages": []anthropic.MessageParam{ - { - Role: anthropic.MessageParamRoleUser, - Content: []anthropic.ContentBlockParamUnion{ - anthropic.NewTextBlock("Hello, world!"), - }, - }, - { - Role: anthropic.MessageParamRoleAssistant, - Content: []anthropic.ContentBlockParamUnion{ - anthropic.NewTextBlock("Hi there!"), - }, - }, - { - Role: anthropic.MessageParamRoleUser, - Content: []anthropic.ContentBlockParamUnion{ - anthropic.NewTextBlock("How are you?"), - }, - }, - }, - "max_tokens": 1000, - "temperature": &temp, - "top_p": &topP, - "top_k": &topK, - "stop_sequences": []string{"Human:", "Assistant:"}, - "stream": false, - "system": "You are a helpful assistant", - "tools": []anthropic.ToolParam{ - { - Name: "get_weather", - Description: anthropic.String("Get weather info"), - InputSchema: anthropic.ToolInputSchemaParam{ - Type: "object", - Properties: map[string]any{ - "location": map[string]any{"type": "string"}, - }, - }, - }, - }, - "tool_choice": map[string]any{"type": "auto"}, - "metadata": map[string]any{"user.id": "test123"}, - } - - rawBody, err := json.Marshal(parsedReq) - require.NoError(t, err) - - _, bodyMutation, err := translator.RequestBody(rawBody, parsedReq, false) - require.NoError(t, err) - require.NotNil(t, bodyMutation) - - var modifiedReq map[string]any - err = json.Unmarshal(bodyMutation.GetBody(), &modifiedReq) - require.NoError(t, err) - - // Messages should be preserved. - require.Len(t, modifiedReq["messages"], 3) - - // Numeric fields get converted to float64 by JSON unmarshalling. - require.Equal(t, float64(1000), modifiedReq["max_tokens"]) - require.Equal(t, 0.7, modifiedReq["temperature"]) - require.Equal(t, 0.95, modifiedReq["top_p"]) - require.Equal(t, float64(40), modifiedReq["top_k"]) - - // Arrays become []interface{} by JSON unmarshalling. - stopSeq, ok := modifiedReq["stop_sequences"].([]any) - require.True(t, ok) - require.Len(t, stopSeq, 2) - require.Equal(t, "Human:", stopSeq[0]) - require.Equal(t, "Assistant:", stopSeq[1]) - - // Boolean false values are now included in the map. - require.Equal(t, false, modifiedReq["stream"]) - - // String values are preserved. - require.Equal(t, "You are a helpful assistant", modifiedReq["system"]) - - // Complex objects should be preserved as maps. - require.NotNil(t, modifiedReq["tools"]) - require.NotNil(t, modifiedReq["tool_choice"]) - require.NotNil(t, modifiedReq["metadata"]) - - // Verify model field is removed from body (it's in the path instead). - _, hasModel := modifiedReq["model"] - require.False(t, hasModel, "model field should be removed from request body") - - // Verify anthropic_version is added for AWS Bedrock. - version, hasVersion := modifiedReq["anthropic_version"] - require.True(t, hasVersion, "anthropic_version should be added for AWS Bedrock") - require.Equal(t, "bedrock-2023-05-31", version, "anthropic_version should match the configured version") -} - func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) { tests := []struct { name string diff --git a/site/docs/capabilities/llm-integrations/supported-endpoints.md b/site/docs/capabilities/llm-integrations/supported-endpoints.md index 949724cadb..cae217e991 100644 --- a/site/docs/capabilities/llm-integrations/supported-endpoints.md +++ b/site/docs/capabilities/llm-integrations/supported-endpoints.md @@ -230,26 +230,26 @@ curl $GATEWAY_URL/v1/models The following table summarizes which providers support which endpoints: | Provider | Chat Completions | Completions | Embeddings | Image Generation | Anthropic Messages | Notes | -|-------------------------------------------------------------------------------------------------------|:----------------:|:-----------:|:----------:|:----------------:|:------------------:|----------------------------------------------------------------------------------------------------------------------| -| [OpenAI](https://platform.openai.com/docs/api-reference) | ✅ | ✅ | ✅ | ✅ | ❌ | | -| [AWS Bedrock](https://docs.aws.amazon.com/bedrock/latest/APIReference/) | ✅ | 🚧 | 🚧 | ❌ | ❌ | Via API translation | -| [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference) | ✅ | 🚧 | ✅ | ⚠️ | ❌ | Via API translation or via [OpenAI-compatible API](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/latest) | -| [Google Gemini](https://ai.google.dev/gemini-api/docs/openai) | ✅ | ⚠️ | ✅ | ⚠️ | ❌ | Via OpenAI-compatible API | -| [Groq](https://console.groq.com/docs/openai) | ✅ | ❌ | ❌ | ❌ | ❌ | Via OpenAI-compatible API | -| [Grok](https://docs.x.ai/docs/api-reference) | ✅ | ⚠️ | ❌ | ⚠️ | ❌ | Via OpenAI-compatible API | -| [Together AI](https://docs.together.ai/docs/openai-api-compatibility) | ⚠️ | ⚠️ | ⚠️ | ⚠️ | ❌ | Via OpenAI-compatible API | -| [Cohere](https://docs.cohere.com/v2/docs/compatibility-api) | ⚠️ | ⚠️ | ⚠️ | ❌ | ❌ | Via OpenAI-compatible API | -| [Mistral](https://docs.mistral.ai/api/) | ⚠️ | ⚠️ | ⚠️ | ❌ | ❌ | Via OpenAI-compatible API | -| [DeepInfra](https://deepinfra.com/docs/inference) | ✅ | ⚠️ | ✅ | ⚠️ | ❌ | Via OpenAI-compatible API | -| [DeepSeek](https://api-docs.deepseek.com/) | ⚠️ | ⚠️ | ❌ | ❌ | ❌ | Via OpenAI-compatible API | -| [Hunyuan](https://cloud.tencent.com/document/product/1729/111007) | ⚠️ | ⚠️ | ⚠️ | ❌ | ❌ | Via OpenAI-compatible API | -| [Tencent LLM Knowledge Engine](https://www.tencentcloud.com/document/product/1255/70381) | ⚠️ | ❌ | ❌ | ❌ | ❌ | Via OpenAI-compatible API | -| [Tetrate Agent Router Service (TARS)](https://router.tetrate.ai/) | ⚠️ | ⚠️ | ⚠️ | ❌ | ❌ | Via OpenAI-compatible API | -| [Google Vertex AI](https://cloud.google.com/vertex-ai/docs/reference/rest) | ✅ | 🚧 | 🚧 | ❌ | ❌ | Via OpenAI-compatible API | -| [Anthropic on Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude) | ✅ | ❌ | 🚧 | ❌ | ✅ | Via OpenAI-compatible API and Native Anthropic API | -| [Anthropic on AWS Bedrock](https://aws.amazon.com/bedrock/anthropic/) | 🚧 | ❌ | ❌ | ❌ | ✅ | Native Anthropic API | -| [SambaNova](https://docs.sambanova.ai/sambastudio/latest/open-ai-api.html) | ✅ | ⚠️ | ✅ | ❌ | ❌ | Via OpenAI-compatible API | -| [Anthropic](https://docs.claude.com/en/home) | ✅ | ❌ | ❌ | ❌ | ✅ | Via OpenAI-compatible API and Native Anthropic API | +| ----------------------------------------------------------------------------------------------------- | :--------------: | :---------: | :--------: | :--------------: | :----------------: | -------------------------------------------------------------------------------------------------------------------- | +| [OpenAI](https://platform.openai.com/docs/api-reference) | ✅ | ✅ | ✅ | ✅ | ❌ | | +| [AWS Bedrock](https://docs.aws.amazon.com/bedrock/latest/APIReference/) | ✅ | 🚧 | 🚧 | ❌ | ❌ | Via API translation | +| [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference) | ✅ | 🚧 | ✅ | ⚠️ | ❌ | Via API translation or via [OpenAI-compatible API](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/latest) | +| [Google Gemini](https://ai.google.dev/gemini-api/docs/openai) | ✅ | ⚠️ | ✅ | ⚠️ | ❌ | Via OpenAI-compatible API | +| [Groq](https://console.groq.com/docs/openai) | ✅ | ❌ | ❌ | ❌ | ❌ | Via OpenAI-compatible API | +| [Grok](https://docs.x.ai/docs/api-reference) | ✅ | ⚠️ | ❌ | ⚠️ | ❌ | Via OpenAI-compatible API | +| [Together AI](https://docs.together.ai/docs/openai-api-compatibility) | ⚠️ | ⚠️ | ⚠️ | ⚠️ | ❌ | Via OpenAI-compatible API | +| [Cohere](https://docs.cohere.com/v2/docs/compatibility-api) | ⚠️ | ⚠️ | ⚠️ | ❌ | ❌ | Via OpenAI-compatible API | +| [Mistral](https://docs.mistral.ai/api/) | ⚠️ | ⚠️ | ⚠️ | ❌ | ❌ | Via OpenAI-compatible API | +| [DeepInfra](https://deepinfra.com/docs/inference) | ✅ | ⚠️ | ✅ | ⚠️ | ❌ | Via OpenAI-compatible API | +| [DeepSeek](https://api-docs.deepseek.com/) | ⚠️ | ⚠️ | ❌ | ❌ | ❌ | Via OpenAI-compatible API | +| [Hunyuan](https://cloud.tencent.com/document/product/1729/111007) | ⚠️ | ⚠️ | ⚠️ | ❌ | ❌ | Via OpenAI-compatible API | +| [Tencent LLM Knowledge Engine](https://www.tencentcloud.com/document/product/1255/70381) | ⚠️ | ❌ | ❌ | ❌ | ❌ | Via OpenAI-compatible API | +| [Tetrate Agent Router Service (TARS)](https://router.tetrate.ai/) | ⚠️ | ⚠️ | ⚠️ | ❌ | ❌ | Via OpenAI-compatible API | +| [Google Vertex AI](https://cloud.google.com/vertex-ai/docs/reference/rest) | ✅ | 🚧 | 🚧 | ❌ | ❌ | Via OpenAI-compatible API | +| [Anthropic on Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude) | ✅ | ❌ | 🚧 | ❌ | ✅ | Via OpenAI-compatible API and Native Anthropic API | +| [Anthropic on AWS Bedrock](https://aws.amazon.com/bedrock/anthropic/) | 🚧 | ❌ | ❌ | ❌ | ✅ | Native Anthropic API | +| [SambaNova](https://docs.sambanova.ai/sambastudio/latest/open-ai-api.html) | ✅ | ⚠️ | ✅ | ❌ | ❌ | Via OpenAI-compatible API | +| [Anthropic](https://docs.claude.com/en/home) | ✅ | ❌ | ❌ | ❌ | ✅ | Via OpenAI-compatible API and Native Anthropic API | - ✅ - Supported and Tested on Envoy AI Gateway CI - ⚠️️ - Expected to work based on provider documentation, but not tested on the CI. diff --git a/tests/extproc/envoy.yaml b/tests/extproc/envoy.yaml index c376fb2c72..29e2674c4b 100644 --- a/tests/extproc/envoy.yaml +++ b/tests/extproc/envoy.yaml @@ -58,6 +58,24 @@ static_resources: route: auto_host_rewrite: true cluster: aws_bedrock + - match: + prefix: "/" + headers: + - name: x-ai-eg-model + string_match: + exact: claude-sonnet-4-5 + route: + auto_host_rewrite: true + cluster: anthropic + - match: + prefix: "/" + headers: + - name: x-ai-eg-model + string_match: + exact: global.anthropic.claude-sonnet-4-5-20250929-v1:0 + route: + auto_host_rewrite: true + cluster: anthropic_aws_bedrock - match: prefix: "/" headers: @@ -728,6 +746,65 @@ static_resources: filter_metadata: aigateway.envoy.io: per_route_rule_backend_name: "aws-bedrock" + - name: anthropic_aws_bedrock + connect_timeout: 30s + type: STRICT_DNS + outlier_detection: + consecutive_5xx: 1 + interval: 1s + base_ejection_time: 2s # Must be smaller than the require.Eventually's interval. Otherwise, the tests may pass without going through the fallback since the always-failing backend could be ejected by the time when require.Eventually retries due to the previous request IF the retry is not configured. + max_ejection_percent: 100 + typed_extension_protocol_options: + envoy.extensions.upstreams.http.v3.HttpProtocolOptions: + "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions + explicit_http_config: + http_protocol_options: {} + http_filters: + - name: upstream_extproc + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor + allow_mode_override: true + request_attributes: + - xds.upstream_host_metadata + processing_mode: + request_header_mode: "SEND" + request_body_mode: "NONE" + response_header_mode: "SKIP" + response_body_mode: "NONE" + grpc_service: + envoy_grpc: + cluster_name: extproc_cluster + metadataOptions: + receivingNamespaces: + untyped: + - io.envoy.ai_gateway + - name: envoy.filters.http.header_mutation + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.header_mutation.v3.HeaderMutation + mutations: + request_mutations: + - append: + append_action: ADD_IF_ABSENT + header: + key: content-length + value: "%DYNAMIC_METADATA(io.envoy.ai_gateway:content_length)%" + - name: envoy.filters.http.upstream_codec + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.upstream_codec.v3.UpstreamCodec + load_assignment: + cluster_name: anthropic_aws_bedrock + endpoints: + - lb_endpoints: + - endpoint: + hostname: bedrock-runtime.us-east-1.amazonaws.com + address: + socket_address: + address: bedrock-runtime.us-east-1.amazonaws.com + port_value: 443 + metadata: + filter_metadata: + aigateway.envoy.io: + per_route_rule_backend_name: "anthropic-aws-bedrock" transport_socket: name: envoy.transport_sockets.tls typed_config: @@ -985,6 +1062,69 @@ static_resources: typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext auto_host_sni: true + - name: anthropic + connect_timeout: 30s + type: STRICT_DNS + outlier_detection: + consecutive_5xx: 1 + interval: 1s + base_ejection_time: 2s # Must be smaller than the require.Eventually's interval. Otherwise, the tests may pass without going through the fallback since the always-failing backend could be ejected by the time when require.Eventually retries due to the previous request IF the retry is not configured. + max_ejection_percent: 100 + typed_extension_protocol_options: + envoy.extensions.upstreams.http.v3.HttpProtocolOptions: + "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions + explicit_http_config: + http_protocol_options: {} + http_filters: + - name: upstream_extproc + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor + request_attributes: + - xds.upstream_host_metadata + processing_mode: + request_header_mode: "SEND" + request_body_mode: "NONE" + response_header_mode: "SKIP" + response_body_mode: "NONE" + grpc_service: + envoy_grpc: + cluster_name: extproc_cluster + metadataOptions: + receivingNamespaces: + untyped: + - io.envoy.ai_gateway + - name: envoy.filters.http.header_mutation + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.header_mutation.v3.HeaderMutation + mutations: + request_mutations: + - append: + append_action: ADD_IF_ABSENT + header: + key: content-length + value: "%DYNAMIC_METADATA(io.envoy.ai_gateway:content_length)%" + - name: envoy.filters.http.upstream_codec + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.upstream_codec.v3.UpstreamCodec + load_assignment: + cluster_name: anthropic + endpoints: + - lb_endpoints: + - endpoint: + hostname: api.anthropic.com + address: + socket_address: + address: api.anthropic.com + port_value: 443 + metadata: + filter_metadata: + aigateway.envoy.io: + per_route_rule_backend_name: "anthropic" + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + auto_host_sni: true - name: azure_openai connect_timeout: 30s type: STRICT_DNS diff --git a/tests/extproc/extproc_test.go b/tests/extproc/extproc_test.go index 2306d0242c..ebe43bf053 100644 --- a/tests/extproc/extproc_test.go +++ b/tests/extproc/extproc_test.go @@ -45,6 +45,7 @@ var ( grokSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1"} sambaNovaSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1"} deepInfraSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1/openai"} + anthropicSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAnthropic} testUpstreamOpenAIBackend = filterapi.Backend{Name: "testupstream-openai", Schema: openAISchema} testUpstreamModelNameOverride = filterapi.Backend{Name: "testupstream-modelname-override", ModelNameOverride: "override-model", Schema: openAISchema} diff --git a/tests/extproc/real_providers_test.go b/tests/extproc/real_providers_test.go index fc51add5a4..7c3fd4b37c 100644 --- a/tests/extproc/real_providers_test.go +++ b/tests/extproc/real_providers_test.go @@ -14,6 +14,8 @@ import ( "testing" "time" + "github.com/anthropics/anthropic-sdk-go" + anthropicoption "github.com/anthropics/anthropic-sdk-go/option" "github.com/openai/openai-go" "github.com/openai/openai-go/option" "github.com/stretchr/testify/assert" @@ -46,10 +48,17 @@ func TestWithRealProviders(t *testing.T) { {Name: "openai", Schema: openAISchema, Auth: &filterapi.BackendAuth{ APIKey: &filterapi.APIKeyAuth{Key: cc.OpenAIAPIKey}, }}, + {Name: "anthropic", Schema: anthropicSchema, Auth: &filterapi.BackendAuth{ + AnthropicAPIKey: &filterapi.AnthropicAPIKeyAuth{Key: cc.AnthropicAPIKey}, + }}, {Name: "aws-bedrock", Schema: awsBedrockSchema, Auth: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{ CredentialFileLiteral: cc.AWSFileLiteral, Region: "us-east-1", }}}, + {Name: "anthropic-aws-bedrock", Schema: awsAnthropicSchema, Auth: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{ + CredentialFileLiteral: cc.AWSFileLiteral, + Region: "us-east-1", + }}}, {Name: "azure-openai", Schema: azureOpenAISchema, Auth: &filterapi.BackendAuth{ AzureAuth: &filterapi.AzureAuth{AccessToken: cc.AzureAccessToken}, }}, @@ -119,6 +128,17 @@ func TestWithRealProviders(t *testing.T) { }) } }) + t.Run("messages", func(t *testing.T) { + for _, tc := range []realProvidersTestCase{ + {name: "anthropic", modelName: "claude-sonnet-4-5", required: internaltesting.RequiredCredentialAnthropic}, + {name: "anthropic-aws-bedrock", modelName: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", required: internaltesting.RequiredCredentialAWS}, + } { + t.Run(tc.name, func(t *testing.T) { + cc.MaybeSkip(t, tc.required) + requireEventuallyMessagesNonStreamingRequestOK(t, listenerAddress, tc.modelName) + }) + } + }) }) // Read all access logs and check if the used token is logged. @@ -362,6 +382,33 @@ func requireEventuallyChatCompletionNonStreamingRequestOK(t *testing.T, listener }, realProvidersEventuallyTimeout, realProvidersEventuallyInterval) } +func requireEventuallyMessagesNonStreamingRequestOK(t *testing.T, listenerAddress, modelName string) { + client := anthropic.NewClient( + anthropicoption.WithAPIKey("dummy"), + anthropicoption.WithBaseURL(listenerAddress+"/anthropic/"), + ) + internaltesting.RequireEventuallyNoError(t, func() error { + message, err := client.Messages.New(t.Context(), anthropic.MessageNewParams{ + MaxTokens: 1024, + Messages: []anthropic.MessageParam{ + anthropic.NewUserMessage(anthropic.NewTextBlock("Say hi!")), + }, + Model: anthropic.Model(modelName), + }) + if err != nil { + t.Logf("messages error: %v", err) + return fmt.Errorf("messages error: %w", err) + } + + if len(message.Content) == 0 { + return fmt.Errorf("empty message content in response") + } + + t.Logf("response: %+v", message.Content) + return nil + }, realProvidersEventuallyTimeout, realProvidersEventuallyInterval) +} + func requireEventuallyEmbeddingsRequestOK(t *testing.T, listenerAddress, modelName string) { client := openai.NewClient(option.WithBaseURL(listenerAddress + "/v1/")) require.Eventually(t, func() bool { diff --git a/tests/extproc/testupstream_test.go b/tests/extproc/testupstream_test.go index 9c0f1b3251..7bcaadf041 100644 --- a/tests/extproc/testupstream_test.go +++ b/tests/extproc/testupstream_test.go @@ -893,7 +893,7 @@ data: {"type": "message_stop"} method: http.MethodPost, expRequestHeaders: map[string]string{"x-api-key": "anthropic-api-key"}, requestBody: `{ - "model": "claude-sonnet-4-5", + "model": "foo", "max_tokens": 1000, "messages": [ { @@ -903,7 +903,7 @@ data: {"type": "message_stop"} ] }`, expPath: "/v1/messages", - responseBody: `{"model":"claude-sonnet-4-5-20250929","id":"msg_01J5gW6Sffiem6avXSAooZZw","type":"message","role":"assistant","content":[{"type":"text","text":"Hi! 👋 How can I help you today?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":16,"service_tier":"standard"}}`, + responseBody: `{"model":"foo","id":"msg_01J5gW6Sffiem6avXSAooZZw","type":"message","role":"assistant","content":[{"type":"text","text":"Hi! 👋 How can I help you today?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":16,"service_tier":"standard"}}`, expStatus: http.StatusOK, }, { @@ -914,7 +914,7 @@ data: {"type": "message_stop"} expRequestHeaders: map[string]string{"x-api-key": "anthropic-api-key"}, responseType: "sse", requestBody: `{ - "model": "claude-sonnet-4-5", + "model": "foo", "max_tokens": 1000, "messages": [ { @@ -926,7 +926,7 @@ data: {"type": "message_stop"} expPath: "/v1/messages", responseBody: ` event: message_start -data: {"type":"message_start","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_01BfvfMsg2gBzwsk6PZRLtDg","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}} } +data: {"type":"message_start","message":{"model":"foo","id":"msg_01BfvfMsg2gBzwsk6PZRLtDg","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}} } event: content_block_start data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} } @@ -960,7 +960,7 @@ data: {"type":"message_stop" } path: "/anthropic/v1/messages", method: http.MethodPost, requestBody: `{"model":"anthropic.claude-3-sonnet-20240229-v1:0","max_tokens":100,"messages":[{"role":"user","content":[{"type":"text","text":"Hello from AWS!"}]}],"stream":false}`, - expRequestBody: `{"anthropic_version":"bedrock-2023-05-31","max_tokens":100,"messages":[{"content":[{"text":"Hello from AWS!","type":"text"}],"role":"user"}],"stream":false}`, + expRequestBody: `{"max_tokens":100,"messages":[{"role":"user","content":[{"type":"text","text":"Hello from AWS!"}]}],"stream":false,"anthropic_version":"bedrock-2023-05-31"}`, expPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke", responseStatus: strconv.Itoa(http.StatusOK), responseBody: `{"id":"msg_aws_123","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hello from AWS Anthropic!"}],"usage":{"input_tokens":10,"output_tokens":20}}`, @@ -974,7 +974,7 @@ data: {"type":"message_stop" } method: http.MethodPost, responseType: "sse", requestBody: `{"model":"anthropic.claude-3-haiku-20240307-v1:0","max_tokens":150,"messages":[{"role":"user","content":[{"type":"text","text":"Tell me a joke"}]}],"stream":true}`, - expRequestBody: `{"anthropic_version":"bedrock-2023-05-31","max_tokens":150,"messages":[{"content":[{"text":"Tell me a joke","type":"text"}],"role":"user"}],"stream":true}`, + expRequestBody: `{"max_tokens":150,"messages":[{"role":"user","content":[{"type":"text","text":"Tell me a joke"}]}],"stream":true,"anthropic_version":"bedrock-2023-05-31"}`, expPath: "/model/anthropic.claude-3-haiku-20240307-v1:0/invoke-stream", responseStatus: strconv.Itoa(http.StatusOK), responseBody: `event: message_start @@ -1023,19 +1023,6 @@ data: {"type":"message_stop"} `, }, - { - name: "aws-anthropic - /anthropic/v1/messages - ARN model format", - backend: "aws-anthropic", - path: "/anthropic/v1/messages", - method: http.MethodPost, - requestBody: `{"model":"arn:aws:bedrock:eu-central-1:538639307912:application-inference-profile/k375tnm6nr0t","max_tokens":50,"messages":[{"role":"user","content":[{"type":"text","text":"Hi"}]}],"stream":false}`, - expRequestBody: `{"anthropic_version":"bedrock-2023-05-31","max_tokens":50,"messages":[{"content":[{"text":"Hi","type":"text"}],"role":"user"}],"stream":false}`, - expPath: "/model/arn:aws:bedrock:eu-central-1:538639307912:application-inference-profile%2Fk375tnm6nr0t/invoke", - responseStatus: strconv.Itoa(http.StatusOK), - responseBody: `{"id":"msg_arn_789","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hi there!"}],"usage":{"input_tokens":5,"output_tokens":8}}`, - expStatus: http.StatusOK, - expResponseBody: `{"id":"msg_arn_789","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hi there!"}],"usage":{"input_tokens":5,"output_tokens":8}}`, - }, { name: "aws-anthropic - /anthropic/v1/messages - error response", backend: "aws-anthropic", From c6c6aad9acc4cff501f00a296491112b70a8fce6 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Wed, 29 Oct 2025 14:38:04 -0700 Subject: [PATCH 15/15] cleanup Signed-off-by: Takeshi Yoneda --- tests/extproc/extproc_test.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/extproc/extproc_test.go b/tests/extproc/extproc_test.go index ebe43bf053..3979e225f5 100644 --- a/tests/extproc/extproc_test.go +++ b/tests/extproc/extproc_test.go @@ -61,10 +61,8 @@ var ( Region: "gcp-region", ProjectName: "gcp-project-name", }}} - testUpstreamAWSAnthropicBackend = filterapi.Backend{Name: "testupstream-aws-anthropic", Schema: awsAnthropicSchema, Auth: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{ - Region: "us-east-1", - }}} - alwaysFailingBackend = filterapi.Backend{Name: "always-failing-backend", Schema: openAISchema} + testUpstreamAWSAnthropicBackend = filterapi.Backend{Name: "testupstream-aws-anthropic", Schema: awsAnthropicSchema} + alwaysFailingBackend = filterapi.Backend{Name: "always-failing-backend", Schema: openAISchema} // envoyConfig is the embedded Envoy configuration template. //