From 82f752fcd62f94d5ddeae753225af0465e40fa62 Mon Sep 17 00:00:00 2001
From: secustor <sebastian@poxhofer.at>
Date: Tue, 21 Oct 2025 00:28:28 +0200
Subject: [PATCH 01/15] feat: add support for anthropic api to anthropic on aws
 bedrock

Signed-off-by: secustor <sebastian@poxhofer.at>
---
 api/v1alpha1/shared_types.go                  |   8 +-
 examples/basic/aws-bedrock-anthropic.yaml     |  93 +++
 internal/extproc/messages_processor.go        |   5 +-
 .../translator/anthropic_awsanthropic.go      | 184 +++++
 .../translator/anthropic_awsanthropic_test.go | 650 ++++++++++++++++++
 internal/filterapi/filterconfig.go            |   3 +
 ...teway.envoyproxy.io_aiservicebackends.yaml |   1 +
 ...teway.envoyproxy.io_aiservicebackends.yaml |   1 +
 site/docs/api/api.mdx                         |   5 +
 .../llm-integrations/supported-endpoints.md   |   2 +
 .../aws-bedrock-anthropic.md                  | 344 +++++++++
 .../connect-providers/index.md                |   1 +
 tests/extproc/extproc_test.go                 |   1 +
 tests/extproc/real_providers_test.go          |   4 +
 14 files changed, 1300 insertions(+), 2 deletions(-)
 create mode 100644 examples/basic/aws-bedrock-anthropic.yaml
 create mode 100644 internal/extproc/translator/anthropic_awsanthropic.go
 create mode 100644 internal/extproc/translator/anthropic_awsanthropic_test.go
 create mode 100644 site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md

diff --git a/api/v1alpha1/shared_types.go b/api/v1alpha1/shared_types.go
index 596a6a56e2..98e97942d1 100644
--- a/api/v1alpha1/shared_types.go
+++ b/api/v1alpha1/shared_types.go
@@ -15,7 +15,7 @@ package v1alpha1
 type VersionedAPISchema struct {
 	// Name is the name of the API schema of the AIGatewayRoute or AIServiceBackend.
 	//
-	// +kubebuilder:validation:Enum=OpenAI;AWSBedrock;AzureOpenAI;GCPVertexAI;GCPAnthropic;Anthropic
+	// +kubebuilder:validation:Enum=OpenAI;AWSBedrock;AzureOpenAI;GCPVertexAI;GCPAnthropic;Anthropic;AWSAnthropic
 	Name APISchema `json:"name"`
 
 	// Version is the version of the API schema.
@@ -65,6 +65,12 @@ const (
 	// APISchemaAnthropic is the native Anthropic API schema.
 	// https://docs.claude.com/en/home
 	APISchemaAnthropic APISchema = "Anthropic"
+	// APISchemaAWSAnthropic is the schema for Anthropic models hosted on AWS Bedrock.
+	// Uses the native Anthropic Messages API format for requests and responses.
+	//
+	// https://aws.amazon.com/bedrock/anthropic/
+	// https://docs.claude.com/en/api/claude-on-amazon-bedrock
+	APISchemaAWSAnthropic APISchema = "AWSAnthropic"
 )
 
 const (
diff --git a/examples/basic/aws-bedrock-anthropic.yaml b/examples/basic/aws-bedrock-anthropic.yaml
new file mode 100644
index 0000000000..b2db5df483
--- /dev/null
+++ b/examples/basic/aws-bedrock-anthropic.yaml
@@ -0,0 +1,93 @@
+# Copyright Envoy AI Gateway Authors
+# SPDX-License-Identifier: Apache-2.0
+# The full text of the Apache license is available in the LICENSE file at
+# the root of the repo.
+
+apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: AIGatewayRoute
+metadata:
+  name: envoy-ai-gateway-basic-aws-bedrock-anthropic
+  namespace: default
+spec:
+  parentRefs:
+    - name: envoy-ai-gateway-basic
+      kind: Gateway
+      group: gateway.networking.k8s.io
+  rules:
+    - matches:
+        - headers:
+            - type: Exact
+              name: x-ai-eg-model
+              value: anthropic.claude-3-5-sonnet-20241022-v2:0
+      backendRefs:
+        - name: envoy-ai-gateway-basic-aws-bedrock-anthropic
+---
+apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: AIServiceBackend
+metadata:
+  name: envoy-ai-gateway-basic-aws-bedrock-anthropic
+  namespace: default
+spec:
+  schema:
+    name: AWSAnthropic
+    version: bedrock-2023-05-31
+  backendRef:
+    name: envoy-ai-gateway-basic-aws-bedrock-anthropic
+    kind: Backend
+    group: gateway.envoyproxy.io
+---
+apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: BackendSecurityPolicy
+metadata:
+  name: envoy-ai-gateway-basic-aws-bedrock-anthropic-credentials
+  namespace: default
+spec:
+  targetRefs:
+    - group: aigateway.envoyproxy.io
+      kind: AIServiceBackend
+      name: envoy-ai-gateway-basic-aws-bedrock-anthropic
+  type: AWSCredentials
+  awsCredentials:
+    region: us-east-1
+    credentialsFile:
+      secretRef:
+        name: envoy-ai-gateway-basic-aws-bedrock-anthropic-credentials
+---
+apiVersion: gateway.envoyproxy.io/v1alpha1
+kind: Backend
+metadata:
+  name: envoy-ai-gateway-basic-aws-bedrock-anthropic
+  namespace: default
+spec:
+  endpoints:
+    - fqdn:
+        hostname: bedrock-runtime.us-east-1.amazonaws.com
+        port: 443
+---
+apiVersion: gateway.networking.k8s.io/v1alpha3
+kind: BackendTLSPolicy
+metadata:
+  name: envoy-ai-gateway-basic-aws-bedrock-anthropic-tls
+  namespace: default
+spec:
+  targetRefs:
+    - group: "gateway.envoyproxy.io"
+      kind: Backend
+      name: envoy-ai-gateway-basic-aws-bedrock-anthropic
+  validation:
+    wellKnownCACertificates: "System"
+    hostname: bedrock-runtime.us-east-1.amazonaws.com
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: envoy-ai-gateway-basic-aws-bedrock-anthropic-credentials
+  namespace: default
+type: Opaque
+stringData:
+  # Replace this with your AWS credentials.
+  # You can also use AWS IAM roles for service accounts (IRSA) in EKS.
+  credentials: |
+    [default]
+    aws_access_key_id = AWS_ACCESS_KEY_ID
+    aws_secret_access_key = AWS_SECRET_ACCESS_KEY
diff --git a/internal/extproc/messages_processor.go b/internal/extproc/messages_processor.go
index 9a5ea3eb72..f6b48ec54f 100644
--- a/internal/extproc/messages_processor.go
+++ b/internal/extproc/messages_processor.go
@@ -157,10 +157,13 @@ func (c *messagesProcessorUpstreamFilter) selectTranslator(out filterapi.Version
 		// Anthropic → GCP Anthropic (request direction translator).
 		// Uses backend config version (GCP Vertex AI requires specific versions like "vertex-2023-10-16").
 		c.translator = translator.NewAnthropicToGCPAnthropicTranslator(out.Version, c.modelNameOverride)
+	case filterapi.APISchemaAWSAnthropic:
+		// Anthropic → AWS Bedrock Anthropic (request direction translator).
+		c.translator = translator.NewAnthropicToAWSAnthropicTranslator(out.Version, c.modelNameOverride)
 	case filterapi.APISchemaAnthropic:
 		c.translator = translator.NewAnthropicToAnthropicTranslator(out.Version, c.modelNameOverride)
 	default:
-		return fmt.Errorf("/v1/messages endpoint only supports backends that return native Anthropic format (GCPAnthropic). Backend %s uses different model format", out.Name)
+		return fmt.Errorf("/v1/messages endpoint only supports backends that return native Anthropic format (Anthropic, GCPAnthropic, AWSAnthropic). Backend %s uses different model format", out.Name)
 	}
 	return nil
 }
diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go
new file mode 100644
index 0000000000..735c9a8119
--- /dev/null
+++ b/internal/extproc/translator/anthropic_awsanthropic.go
@@ -0,0 +1,184 @@
+// Copyright Envoy AI Gateway Authors
+// SPDX-License-Identifier: Apache-2.0
+// The full text of the Apache license is available in the LICENSE file at
+// the root of the repo.
+
+package translator
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"maps"
+	"net/url"
+
+	"github.com/anthropics/anthropic-sdk-go"
+	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
+
+	anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic"
+	"github.com/envoyproxy/ai-gateway/internal/internalapi"
+)
+
+// NewAnthropicToAWSAnthropicTranslator creates a translator for Anthropic to AWS Bedrock Anthropic format.
+// AWS Bedrock supports the native Anthropic Messages API, so this is essentially a passthrough
+// translator with AWS-specific path modifications.
+func NewAnthropicToAWSAnthropicTranslator(apiVersion string, modelNameOverride internalapi.ModelNameOverride) AnthropicMessagesTranslator {
+	return &anthropicToAWSAnthropicTranslator{
+		apiVersion:        apiVersion,
+		modelNameOverride: modelNameOverride,
+	}
+}
+
+type anthropicToAWSAnthropicTranslator struct {
+	// TODO: reuse anthropicToAnthropicTranslator and embed it here to avoid code duplication.
+	apiVersion        string
+	modelNameOverride internalapi.ModelNameOverride
+	requestModel      internalapi.RequestModel
+}
+
+// RequestBody implements [AnthropicMessagesTranslator.RequestBody] for Anthropic to AWS Bedrock Anthropic translation.
+// This handles the transformation from native Anthropic format to AWS Bedrock format.
+func (a *anthropicToAWSAnthropicTranslator) RequestBody(_ []byte, body *anthropicschema.MessagesRequest, _ bool) (
+	headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, err error,
+) {
+	// Extract model name for AWS Bedrock endpoint from the parsed request.
+	modelName := body.GetModel()
+
+	// Work directly with the map since MessagesRequest is already map[string]interface{}.
+	anthropicReq := make(map[string]any)
+	maps.Copy(anthropicReq, *body)
+
+	// Apply model name override if configured.
+	a.requestModel = modelName
+	if a.modelNameOverride != "" {
+		a.requestModel = a.modelNameOverride
+	}
+
+	// Remove the model field since AWS Bedrock doesn't want it in the body (it's in the path).
+	delete(anthropicReq, "model")
+
+	// Add AWS-Bedrock-specific anthropic_version field (required by AWS Bedrock).
+	// Uses backend config version (e.g., "bedrock-2023-05-31" for AWS Bedrock).
+	if a.apiVersion == "" {
+		return nil, nil, fmt.Errorf("anthropic_version is required for AWS Bedrock but not provided in backend configuration")
+	}
+	anthropicReq[anthropicVersionKey] = a.apiVersion
+
+	// Marshal the modified request.
+	mutatedBody, err := json.Marshal(anthropicReq)
+	if err != nil {
+		return nil, nil, fmt.Errorf("failed to marshal modified request: %w", err)
+	}
+
+	// Determine the AWS Bedrock path based on whether streaming is requested.
+	var pathTemplate string
+	if stream, ok := anthropicReq["stream"].(bool); ok && stream {
+		pathTemplate = "/model/%s/invoke-stream"
+	} else {
+		pathTemplate = "/model/%s/invoke"
+	}
+
+	// URL encode the model ID for the path to handle ARNs with special characters.
+	// AWS Bedrock model IDs can be simple names (e.g., "anthropic.claude-3-5-sonnet-20241022-v2:0")
+	// or full ARNs which may contain special characters.
+	encodedModelID := url.PathEscape(a.requestModel)
+	pathSuffix := fmt.Sprintf(pathTemplate, encodedModelID)
+
+	headerMutation, bodyMutation = buildRequestMutations(pathSuffix, mutatedBody)
+	return
+}
+
+// ResponseHeaders implements [AnthropicMessagesTranslator.ResponseHeaders] for Anthropic to AWS Bedrock Anthropic.
+func (a *anthropicToAWSAnthropicTranslator) ResponseHeaders(_ map[string]string) (
+	headerMutation *extprocv3.HeaderMutation, err error,
+) {
+	// For Anthropic to AWS Bedrock Anthropic, no header transformation is needed.
+	return nil, nil
+}
+
+// ResponseBody implements [AnthropicMessagesTranslator.ResponseBody] for Anthropic to AWS Bedrock Anthropic.
+// This is essentially a passthrough since AWS Bedrock returns the native Anthropic response format.
+func (a *anthropicToAWSAnthropicTranslator) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool) (
+	headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error,
+) {
+	// Read the response body for both streaming and non-streaming.
+	bodyBytes, err := io.ReadAll(body)
+	if err != nil {
+		return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err)
+	}
+
+	// For streaming chunks, parse SSE format to extract token usage.
+	if !endOfStream {
+		// Parse SSE format - split by lines and look for data: lines.
+		for line := range bytes.Lines(bodyBytes) {
+			line = bytes.TrimSpace(line)
+			if bytes.HasPrefix(line, dataPrefix) {
+				jsonData := bytes.TrimPrefix(line, dataPrefix)
+
+				var eventData map[string]any
+				if unmarshalErr := json.Unmarshal(jsonData, &eventData); unmarshalErr != nil {
+					// Skip lines with invalid JSON (like ping events or malformed data).
+					continue
+				}
+				if eventType, ok := eventData["type"].(string); ok {
+					switch eventType {
+					case "message_start":
+						// Extract input tokens from message.usage.
+						if messageData, ok := eventData["message"].(map[string]any); ok {
+							if usageData, ok := messageData["usage"].(map[string]any); ok {
+								if inputTokens, ok := usageData["input_tokens"].(float64); ok {
+									tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec
+								}
+								// Some message_start events may include initial output tokens.
+								if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 {
+									tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec
+								}
+								tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens
+							}
+						}
+
+					case "message_delta":
+						if usageData, ok := eventData["usage"].(map[string]any); ok {
+							if outputTokens, ok := usageData["output_tokens"].(float64); ok {
+								// Add to existing output tokens (in case message_start had some initial ones).
+								tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec
+								tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens
+							}
+						}
+					}
+				}
+			}
+		}
+
+		return nil, &extprocv3.BodyMutation{
+			Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
+		}, tokenUsage, a.requestModel, nil
+	}
+
+	// Parse the Anthropic response to extract token usage.
+	var anthropicResp anthropic.Message
+	if err = json.Unmarshal(bodyBytes, &anthropicResp); err != nil {
+		// If we can't parse as Anthropic format, pass through as-is.
+		return nil, &extprocv3.BodyMutation{
+			Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
+		}, LLMTokenUsage{}, a.requestModel, nil
+	}
+
+	// Extract token usage from the response.
+	tokenUsage = LLMTokenUsage{
+		InputTokens:       uint32(anthropicResp.Usage.InputTokens),                                    //nolint:gosec
+		OutputTokens:      uint32(anthropicResp.Usage.OutputTokens),                                   //nolint:gosec
+		TotalTokens:       uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec
+		CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens),                           //nolint:gosec
+	}
+
+	// Pass through the response body unchanged since both input and output are Anthropic format.
+	headerMutation = &extprocv3.HeaderMutation{}
+	setContentLength(headerMutation, bodyBytes)
+	bodyMutation = &extprocv3.BodyMutation{
+		Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
+	}
+
+	return headerMutation, bodyMutation, tokenUsage, a.requestModel, nil
+}
diff --git a/internal/extproc/translator/anthropic_awsanthropic_test.go b/internal/extproc/translator/anthropic_awsanthropic_test.go
new file mode 100644
index 0000000000..8d9c442f55
--- /dev/null
+++ b/internal/extproc/translator/anthropic_awsanthropic_test.go
@@ -0,0 +1,650 @@
+// Copyright Envoy AI Gateway Authors
+// SPDX-License-Identifier: Apache-2.0
+// The full text of the Apache license is available in the LICENSE file at
+// the root of the repo.
+
+package translator
+
+import (
+	"bytes"
+	"encoding/json"
+	"io"
+	"testing"
+
+	"github.com/anthropics/anthropic-sdk-go"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic"
+)
+
+func TestAnthropicToAWSAnthropicTranslator_RequestBody_ModelNameOverride(t *testing.T) {
+	tests := []struct {
+		name           string
+		override       string
+		inputModel     string
+		expectedModel  string
+		expectedInPath string
+	}{
+		{
+			name:           "no override uses original model",
+			override:       "",
+			inputModel:     "anthropic.claude-3-haiku-20240307-v1:0",
+			expectedModel:  "anthropic.claude-3-haiku-20240307-v1:0",
+			expectedInPath: "anthropic.claude-3-haiku-20240307-v1:0",
+		},
+		{
+			name:           "override replaces model in body and path",
+			override:       "anthropic.claude-3-sonnet-20240229-v1:0",
+			inputModel:     "anthropic.claude-3-haiku-20240307-v1:0",
+			expectedModel:  "anthropic.claude-3-sonnet-20240229-v1:0",
+			expectedInPath: "anthropic.claude-3-sonnet-20240229-v1:0",
+		},
+		{
+			name:           "override with empty input model",
+			override:       "anthropic.claude-3-opus-20240229-v1:0",
+			inputModel:     "",
+			expectedModel:  "anthropic.claude-3-opus-20240229-v1:0",
+			expectedInPath: "anthropic.claude-3-opus-20240229-v1:0",
+		},
+		{
+			name:           "model with ARN format",
+			override:       "",
+			inputModel:     "arn:aws:bedrock:eu-central-1:000000000:application-inference-profile/aaaaaaaaa",
+			expectedModel:  "arn:aws:bedrock:eu-central-1:000000000:application-inference-profile/aaaaaaaaa",
+			expectedInPath: "arn:aws:bedrock:eu-central-1:000000000:application-inference-profile%2Faaaaaaaaa",
+		},
+		{
+			name:           "global model ID",
+			override:       "",
+			inputModel:     "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+			expectedModel:  "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+			expectedInPath: "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", tt.override)
+
+			// Create the request using map structure.
+			originalReq := &anthropicschema.MessagesRequest{
+				"model": tt.inputModel,
+				"messages": []anthropic.MessageParam{
+					{
+						Role: anthropic.MessageParamRoleUser,
+						Content: []anthropic.ContentBlockParamUnion{
+							anthropic.NewTextBlock("Hello"),
+						},
+					},
+				},
+			}
+
+			headerMutation, bodyMutation, err := translator.RequestBody(nil, originalReq, false)
+			require.NoError(t, err)
+			require.NotNil(t, headerMutation)
+			require.NotNil(t, bodyMutation)
+
+			// Check path header contains expected model (URL encoded).
+			pathHeader := headerMutation.SetHeaders[0]
+			require.Equal(t, ":path", pathHeader.Header.Key)
+			expectedPath := "/model/" + tt.expectedInPath + "/invoke"
+			assert.Equal(t, expectedPath, string(pathHeader.Header.RawValue))
+
+			// Check that model field is removed from body (since it's in the path).
+			var modifiedReq map[string]any
+			err = json.Unmarshal(bodyMutation.GetBody(), &modifiedReq)
+			require.NoError(t, err)
+			_, hasModel := modifiedReq["model"]
+			assert.False(t, hasModel, "model field should be removed from request body")
+
+			// Verify anthropic_version field is added (required by AWS Bedrock).
+			version, hasVersion := modifiedReq["anthropic_version"]
+			assert.True(t, hasVersion, "anthropic_version should be added for AWS Bedrock")
+			assert.Equal(t, "bedrock-2023-05-31", version, "anthropic_version should match the configured version")
+		})
+	}
+}
+
+func TestAnthropicToAWSAnthropicTranslator_ComprehensiveMarshalling(t *testing.T) {
+	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
+
+	// Create a comprehensive MessagesRequest with all possible fields using map structure.
+	originalReq := &anthropicschema.MessagesRequest{
+		"model": "anthropic.claude-3-opus-20240229-v1:0",
+		"messages": []anthropic.MessageParam{
+			{
+				Role: anthropic.MessageParamRoleUser,
+				Content: []anthropic.ContentBlockParamUnion{
+					anthropic.NewTextBlock("Hello, how are you?"),
+				},
+			},
+			{
+				Role: anthropic.MessageParamRoleAssistant,
+				Content: []anthropic.ContentBlockParamUnion{
+					anthropic.NewTextBlock("I'm doing well, thank you!"),
+				},
+			},
+			{
+				Role: anthropic.MessageParamRoleUser,
+				Content: []anthropic.ContentBlockParamUnion{
+					anthropic.NewTextBlock("Can you help me with the weather?"),
+				},
+			},
+		},
+		"max_tokens":     1024,
+		"stream":         false,
+		"temperature":    func() *float64 { v := 0.7; return &v }(),
+		"top_p":          func() *float64 { v := 0.95; return &v }(),
+		"top_k":          func() *int { v := 40; return &v }(),
+		"stop_sequences": []string{"Human:", "Assistant:"},
+		"system":         "You are a helpful weather assistant.",
+		"tools": []anthropic.ToolParam{
+			{
+				Name:        "get_weather",
+				Description: anthropic.String("Get current weather information"),
+				InputSchema: anthropic.ToolInputSchemaParam{
+					Type: "object",
+					Properties: map[string]any{
+						"location": map[string]any{
+							"type":        "string",
+							"description": "City name",
+						},
+					},
+					Required: []string{"location"},
+				},
+			},
+		},
+		"tool_choice": anthropic.ToolChoiceUnionParam{
+			OfAuto: &anthropic.ToolChoiceAutoParam{},
+		},
+	}
+
+	headerMutation, bodyMutation, err := translator.RequestBody(nil, originalReq, false)
+	require.NoError(t, err)
+	require.NotNil(t, headerMutation)
+	require.NotNil(t, bodyMutation)
+
+	var outputReq map[string]any
+	err = json.Unmarshal(bodyMutation.GetBody(), &outputReq)
+	require.NoError(t, err)
+
+	require.NotContains(t, outputReq, "model", "model field should be removed for AWS Bedrock")
+
+	// AWS Bedrock requires anthropic_version field.
+	require.Contains(t, outputReq, "anthropic_version", "anthropic_version should be added for AWS Bedrock")
+	require.Equal(t, "bedrock-2023-05-31", outputReq["anthropic_version"], "anthropic_version should match the configured version")
+
+	messages, ok := outputReq["messages"].([]any)
+	require.True(t, ok, "messages should be an array")
+	require.Len(t, messages, 3, "should have 3 messages")
+
+	require.Equal(t, float64(1024), outputReq["max_tokens"])
+	require.Equal(t, false, outputReq["stream"])
+	require.Equal(t, 0.7, outputReq["temperature"])
+	require.Equal(t, 0.95, outputReq["top_p"])
+	require.Equal(t, float64(40), outputReq["top_k"])
+	require.Equal(t, "You are a helpful weather assistant.", outputReq["system"])
+
+	stopSeq, ok := outputReq["stop_sequences"].([]any)
+	require.True(t, ok, "stop_sequences should be an array")
+	require.Len(t, stopSeq, 2)
+	require.Equal(t, "Human:", stopSeq[0])
+	require.Equal(t, "Assistant:", stopSeq[1])
+
+	tools, ok := outputReq["tools"].([]any)
+	require.True(t, ok, "tools should be an array")
+	require.Len(t, tools, 1)
+
+	toolChoice, ok := outputReq["tool_choice"].(map[string]any)
+	require.True(t, ok, "tool_choice should be an object")
+	require.NotEmpty(t, toolChoice)
+
+	pathHeader := headerMutation.SetHeaders[0]
+	require.Equal(t, ":path", pathHeader.Header.Key)
+	expectedPath := "/model/anthropic.claude-3-opus-20240229-v1:0/invoke"
+	require.Equal(t, expectedPath, string(pathHeader.Header.RawValue))
+}
+
+func TestAnthropicToAWSAnthropicTranslator_RequestBody_StreamingPaths(t *testing.T) {
+	tests := []struct {
+		name               string
+		stream             any
+		expectedPathSuffix string
+	}{
+		{
+			name:               "non-streaming uses /invoke",
+			stream:             false,
+			expectedPathSuffix: "/invoke",
+		},
+		{
+			name:               "streaming uses /invoke-stream",
+			stream:             true,
+			expectedPathSuffix: "/invoke-stream",
+		},
+		{
+			name:               "missing stream defaults to /invoke",
+			stream:             nil,
+			expectedPathSuffix: "/invoke",
+		},
+		{
+			name:               "non-boolean stream defaults to /invoke",
+			stream:             "true",
+			expectedPathSuffix: "/invoke",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
+
+			parsedReq := &anthropicschema.MessagesRequest{
+				"model": "anthropic.claude-3-sonnet-20240229-v1:0",
+				"messages": []anthropic.MessageParam{
+					{
+						Role: anthropic.MessageParamRoleUser,
+						Content: []anthropic.ContentBlockParamUnion{
+							anthropic.NewTextBlock("Test"),
+						},
+					},
+				},
+			}
+			if tt.stream != nil {
+				if streamVal, ok := tt.stream.(bool); ok {
+					(*parsedReq)["stream"] = streamVal
+				}
+			}
+
+			headerMutation, _, err := translator.RequestBody(nil, parsedReq, false)
+			require.NoError(t, err)
+			require.NotNil(t, headerMutation)
+
+			// Check path contains expected suffix.
+			pathHeader := headerMutation.SetHeaders[0]
+			expectedPath := "/model/anthropic.claude-3-sonnet-20240229-v1:0" + tt.expectedPathSuffix
+			assert.Equal(t, expectedPath, string(pathHeader.Header.RawValue))
+		})
+	}
+}
+
+func TestAnthropicToAWSAnthropicTranslator_RequestBody_FieldPassthrough(t *testing.T) {
+	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
+
+	temp := 0.7
+	topP := 0.95
+	topK := 40
+	parsedReq := &anthropicschema.MessagesRequest{
+		"model": "anthropic.claude-3-sonnet-20240229-v1:0",
+		"messages": []anthropic.MessageParam{
+			{
+				Role: anthropic.MessageParamRoleUser,
+				Content: []anthropic.ContentBlockParamUnion{
+					anthropic.NewTextBlock("Hello, world!"),
+				},
+			},
+			{
+				Role: anthropic.MessageParamRoleAssistant,
+				Content: []anthropic.ContentBlockParamUnion{
+					anthropic.NewTextBlock("Hi there!"),
+				},
+			},
+			{
+				Role: anthropic.MessageParamRoleUser,
+				Content: []anthropic.ContentBlockParamUnion{
+					anthropic.NewTextBlock("How are you?"),
+				},
+			},
+		},
+		"max_tokens":     1000,
+		"temperature":    &temp,
+		"top_p":          &topP,
+		"top_k":          &topK,
+		"stop_sequences": []string{"Human:", "Assistant:"},
+		"stream":         false,
+		"system":         "You are a helpful assistant",
+		"tools": []anthropic.ToolParam{
+			{
+				Name:        "get_weather",
+				Description: anthropic.String("Get weather info"),
+				InputSchema: anthropic.ToolInputSchemaParam{
+					Type: "object",
+					Properties: map[string]any{
+						"location": map[string]any{"type": "string"},
+					},
+				},
+			},
+		},
+		"tool_choice": map[string]any{"type": "auto"},
+		"metadata":    map[string]any{"user.id": "test123"},
+	}
+
+	_, bodyMutation, err := translator.RequestBody(nil, parsedReq, false)
+	require.NoError(t, err)
+	require.NotNil(t, bodyMutation)
+
+	var modifiedReq map[string]any
+	err = json.Unmarshal(bodyMutation.GetBody(), &modifiedReq)
+	require.NoError(t, err)
+
+	// Messages should be preserved.
+	require.Len(t, modifiedReq["messages"], 3)
+
+	// Numeric fields get converted to float64 by JSON unmarshalling.
+	require.Equal(t, float64(1000), modifiedReq["max_tokens"])
+	require.Equal(t, 0.7, modifiedReq["temperature"])
+	require.Equal(t, 0.95, modifiedReq["top_p"])
+	require.Equal(t, float64(40), modifiedReq["top_k"])
+
+	// Arrays become []interface{} by JSON unmarshalling.
+	stopSeq, ok := modifiedReq["stop_sequences"].([]any)
+	require.True(t, ok)
+	require.Len(t, stopSeq, 2)
+	require.Equal(t, "Human:", stopSeq[0])
+	require.Equal(t, "Assistant:", stopSeq[1])
+
+	// Boolean false values are now included in the map.
+	require.Equal(t, false, modifiedReq["stream"])
+
+	// String values are preserved.
+	require.Equal(t, "You are a helpful assistant", modifiedReq["system"])
+
+	// Complex objects should be preserved as maps.
+	require.NotNil(t, modifiedReq["tools"])
+	require.NotNil(t, modifiedReq["tool_choice"])
+	require.NotNil(t, modifiedReq["metadata"])
+
+	// Verify model field is removed from body (it's in the path instead).
+	_, hasModel := modifiedReq["model"]
+	require.False(t, hasModel, "model field should be removed from request body")
+
+	// Verify anthropic_version is added for AWS Bedrock.
+	version, hasVersion := modifiedReq["anthropic_version"]
+	require.True(t, hasVersion, "anthropic_version should be added for AWS Bedrock")
+	require.Equal(t, "bedrock-2023-05-31", version, "anthropic_version should match the configured version")
+}
+
+func TestAnthropicToAWSAnthropicTranslator_ResponseHeaders(t *testing.T) {
+	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
+
+	tests := []struct {
+		name    string
+		headers map[string]string
+	}{
+		{
+			name:    "empty headers",
+			headers: map[string]string{},
+		},
+		{
+			name: "various headers",
+			headers: map[string]string{
+				"content-type":  "application/json",
+				"authorization": "Bearer token",
+				"custom-header": "value",
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			headerMutation, err := translator.ResponseHeaders(tt.headers)
+			require.NoError(t, err)
+			assert.Nil(t, headerMutation, "ResponseHeaders should return nil for passthrough")
+		})
+	}
+}
+
+func TestAnthropicToAWSAnthropicTranslator_ResponseBody_NonStreaming(t *testing.T) {
+	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
+
+	// Create a sample Anthropic response.
+	respBody := anthropic.Message{
+		ID:   "msg_test123",
+		Type: "message",
+		Role: "assistant",
+		Content: []anthropic.ContentBlockUnion{
+			{Type: "text", Text: "Hello! How can I help you today?"},
+		},
+		Model: "claude-3-sonnet-20240229",
+		Usage: anthropic.Usage{
+			InputTokens:  25,
+			OutputTokens: 15,
+		},
+	}
+
+	bodyBytes, err := json.Marshal(respBody)
+	require.NoError(t, err)
+
+	bodyReader := bytes.NewReader(bodyBytes)
+	respHeaders := map[string]string{"content-type": "application/json"}
+
+	headerMutation, bodyMutation, tokenUsage, responseModel, err := translator.ResponseBody(respHeaders, bodyReader, true)
+	require.NoError(t, err)
+	require.NotNil(t, headerMutation)
+	require.NotNil(t, bodyMutation)
+
+	expectedUsage := LLMTokenUsage{
+		InputTokens:  25,
+		OutputTokens: 15,
+		TotalTokens:  40,
+	}
+	assert.Equal(t, expectedUsage, tokenUsage)
+
+	// responseModel should be populated from requestModel set during RequestBody.
+	assert.Empty(t, responseModel)
+
+	// Verify body is passed through - compare key fields.
+	var outputResp anthropic.Message
+	err = json.Unmarshal(bodyMutation.GetBody(), &outputResp)
+	require.NoError(t, err)
+	assert.Equal(t, respBody.ID, outputResp.ID)
+	assert.Equal(t, respBody.Type, outputResp.Type)
+	assert.Equal(t, respBody.Role, outputResp.Role)
+	assert.Equal(t, respBody.Model, outputResp.Model)
+	assert.Equal(t, respBody.Usage.InputTokens, outputResp.Usage.InputTokens)
+	assert.Equal(t, respBody.Usage.OutputTokens, outputResp.Usage.OutputTokens)
+}
+
+func TestAnthropicToAWSAnthropicTranslator_ResponseBody_WithCachedTokens(t *testing.T) {
+	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
+
+	// Test response with cached input tokens.
+	respBody := anthropic.Message{
+		ID:      "msg_cached",
+		Type:    "message",
+		Role:    "assistant",
+		Content: []anthropic.ContentBlockUnion{{Type: "text", Text: "Response with cache"}},
+		Model:   "claude-3-sonnet-20240229",
+		Usage: anthropic.Usage{
+			InputTokens:              50,
+			OutputTokens:             20,
+			CacheReadInputTokens:     30,
+			CacheCreationInputTokens: 10,
+		},
+	}
+
+	bodyBytes, err := json.Marshal(respBody)
+	require.NoError(t, err)
+
+	bodyReader := bytes.NewReader(bodyBytes)
+	respHeaders := map[string]string{"content-type": "application/json"}
+
+	_, _, tokenUsage, _, err := translator.ResponseBody(respHeaders, bodyReader, true)
+	require.NoError(t, err)
+
+	expectedUsage := LLMTokenUsage{
+		InputTokens:       50,
+		OutputTokens:      20,
+		TotalTokens:       70,
+		CachedInputTokens: 30,
+	}
+	assert.Equal(t, expectedUsage, tokenUsage)
+}
+
+func TestAnthropicToAWSAnthropicTranslator_ResponseBody_StreamingTokenUsage(t *testing.T) {
+	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
+
+	tests := []struct {
+		name          string
+		chunk         string
+		endOfStream   bool
+		expectedUsage LLMTokenUsage
+		expectedBody  string
+	}{
+		{
+			name:        "message_start chunk with token usage",
+			chunk:       "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_123\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"model\":\"claude-3-sonnet-20240229\",\"usage\":{\"input_tokens\":25,\"output_tokens\":0}}}\n\n",
+			endOfStream: false,
+			expectedUsage: LLMTokenUsage{
+				InputTokens:  25,
+				OutputTokens: 0,
+				TotalTokens:  25,
+			},
+			expectedBody: "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_123\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"model\":\"claude-3-sonnet-20240229\",\"usage\":{\"input_tokens\":25,\"output_tokens\":0}}}\n\n",
+		},
+		{
+			name:        "content_block_delta chunk without usage",
+			chunk:       "event: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\" to me.\"}}\n\n",
+			endOfStream: false,
+			expectedUsage: LLMTokenUsage{
+				InputTokens:  0,
+				OutputTokens: 0,
+				TotalTokens:  0,
+			},
+			expectedBody: "event: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\" to me.\"}}\n\n",
+		},
+		{
+			name:        "message_delta chunk with output tokens",
+			chunk:       "event: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null},\"usage\":{\"output_tokens\":84}}\n\n",
+			endOfStream: false,
+			expectedUsage: LLMTokenUsage{
+				InputTokens:  0,
+				OutputTokens: 84,
+				TotalTokens:  84,
+			},
+			expectedBody: "event: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null},\"usage\":{\"output_tokens\":84}}\n\n",
+		},
+		{
+			name:        "message_stop chunk without usage",
+			chunk:       "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n",
+			endOfStream: false,
+			expectedUsage: LLMTokenUsage{
+				InputTokens:  0,
+				OutputTokens: 0,
+				TotalTokens:  0,
+			},
+			expectedBody: "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			bodyReader := bytes.NewReader([]byte(tt.chunk))
+			respHeaders := map[string]string{"content-type": "text/event-stream"}
+
+			headerMutation, bodyMutation, tokenUsage, _, err := translator.ResponseBody(respHeaders, bodyReader, tt.endOfStream)
+
+			require.NoError(t, err)
+			require.Nil(t, headerMutation)
+			require.NotNil(t, bodyMutation)
+			require.Equal(t, tt.expectedBody, string(bodyMutation.GetBody()))
+			require.Equal(t, tt.expectedUsage, tokenUsage)
+		})
+	}
+}
+
+func TestAnthropicToAWSAnthropicTranslator_ResponseBody_ReadError(t *testing.T) {
+	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
+
+	// Create a reader that will fail.
+	errorReader := &awsAnthropicErrorReader{}
+	respHeaders := map[string]string{"content-type": "application/json"}
+
+	_, _, _, _, err := translator.ResponseBody(respHeaders, errorReader, true)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "failed to read response body")
+}
+
+// awsAnthropicErrorReader implements io.Reader but always returns an error.
+type awsAnthropicErrorReader struct{}
+
+func (e *awsAnthropicErrorReader) Read(_ []byte) (n int, err error) {
+	return 0, io.ErrUnexpectedEOF
+}
+
+func TestAnthropicToAWSAnthropicTranslator_ResponseBody_InvalidJSON(t *testing.T) {
+	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
+
+	invalidJSON := []byte(`{invalid json}`)
+	bodyReader := bytes.NewReader(invalidJSON)
+	respHeaders := map[string]string{"content-type": "application/json"}
+
+	headerMutation, bodyMutation, tokenUsage, _, err := translator.ResponseBody(respHeaders, bodyReader, true)
+
+	// Should not error - just pass through invalid JSON.
+	require.NoError(t, err)
+	require.NotNil(t, bodyMutation)
+	// headerMutation is set with content-length for non-streaming responses
+	if headerMutation != nil {
+		assert.NotEmpty(t, headerMutation.SetHeaders)
+	}
+
+	//nolint:testifylint //  testifylint want to use JSONEq which is not possible
+	assert.Equal(t, invalidJSON, bodyMutation.GetBody())
+
+	// Token usage should be zero for invalid JSON.
+	expectedUsage := LLMTokenUsage{
+		InputTokens:  0,
+		OutputTokens: 0,
+		TotalTokens:  0,
+	}
+	assert.Equal(t, expectedUsage, tokenUsage)
+}
+
+func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) {
+	tests := []struct {
+		name         string
+		modelID      string
+		expectedPath string
+	}{
+		{
+			name:         "simple model ID with colon",
+			modelID:      "anthropic.claude-3-sonnet-20240229-v1:0",
+			expectedPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
+		},
+		{
+			name:         "full ARN with multiple special characters",
+			modelID:      "arn:aws:bedrock:us-east-1:123456789012:foundation-model/anthropic.claude-3-sonnet-20240229-v1:0",
+			expectedPath: "/model/arn:aws:bedrock:us-east-1:123456789012:foundation-model%2Fanthropic.claude-3-sonnet-20240229-v1:0/invoke",
+		},
+		{
+			name:         "global model prefix",
+			modelID:      "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+			expectedPath: "/model/global.anthropic.claude-sonnet-4-5-20250929-v1:0/invoke",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
+
+			originalReq := &anthropicschema.MessagesRequest{
+				"model": tt.modelID,
+				"messages": []anthropic.MessageParam{
+					{
+						Role: anthropic.MessageParamRoleUser,
+						Content: []anthropic.ContentBlockParamUnion{
+							anthropic.NewTextBlock("Test"),
+						},
+					},
+				},
+			}
+
+			headerMutation, _, err := translator.RequestBody(nil, originalReq, false)
+			require.NoError(t, err)
+			require.NotNil(t, headerMutation)
+
+			pathHeader := headerMutation.SetHeaders[0]
+			assert.Equal(t, tt.expectedPath, string(pathHeader.Header.RawValue))
+		})
+	}
+}
diff --git a/internal/filterapi/filterconfig.go b/internal/filterapi/filterconfig.go
index b9eb226216..80b013a08b 100644
--- a/internal/filterapi/filterconfig.go
+++ b/internal/filterapi/filterconfig.go
@@ -117,6 +117,9 @@ const (
 	APISchemaGCPAnthropic APISchemaName = "GCPAnthropic"
 	// APISchemaAnthropic represents the standard Anthropic API schema.
 	APISchemaAnthropic APISchemaName = "Anthropic"
+	// APISchemaAWSAnthropic represents the AWS Bedrock Anthropic API schema.
+	// Used for Claude models hosted on AWS Bedrock using the native Anthropic Messages API.
+	APISchemaAWSAnthropic APISchemaName = "AWSAnthropic"
 )
 
 // RouteRuleName is the name of the route rule.
diff --git a/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aiservicebackends.yaml b/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aiservicebackends.yaml
index 6c2cf79190..f46b75d026 100644
--- a/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aiservicebackends.yaml
+++ b/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aiservicebackends.yaml
@@ -235,6 +235,7 @@ spec:
                     - GCPVertexAI
                     - GCPAnthropic
                     - Anthropic
+                    - AWSAnthropic
                     type: string
                   version:
                     description: |-
diff --git a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aiservicebackends.yaml b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aiservicebackends.yaml
index 6c2cf79190..f46b75d026 100644
--- a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aiservicebackends.yaml
+++ b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aiservicebackends.yaml
@@ -235,6 +235,7 @@ spec:
                     - GCPVertexAI
                     - GCPAnthropic
                     - Anthropic
+                    - AWSAnthropic
                     type: string
                   version:
                     description: |-
diff --git a/site/docs/api/api.mdx b/site/docs/api/api.mdx
index 6a11fe730e..ead38a39b9 100644
--- a/site/docs/api/api.mdx
+++ b/site/docs/api/api.mdx
@@ -757,6 +757,11 @@ APISchema defines the API schema.
   type="enum"
   required="false"
   description="APISchemaAnthropic is the native Anthropic API schema.<br />https://docs.claude.com/en/home<br />"
+/><ApiField
+  name="AWSAnthropic"
+  type="enum"
+  required="false"
+  description="APISchemaAWSAnthropic is the schema for Anthropic models hosted on AWS Bedrock.<br />Uses the native Anthropic Messages API format for requests and responses.<br />https://aws.amazon.com/bedrock/anthropic/<br />https://docs.claude.com/en/api/claude-on-amazon-bedrock<br />"
 />
 #### AWSCredentialsFile
 
diff --git a/site/docs/capabilities/llm-integrations/supported-endpoints.md b/site/docs/capabilities/llm-integrations/supported-endpoints.md
index fa1c4ac8ad..31deb9a0a0 100644
--- a/site/docs/capabilities/llm-integrations/supported-endpoints.md
+++ b/site/docs/capabilities/llm-integrations/supported-endpoints.md
@@ -80,6 +80,7 @@ curl -H "Content-Type: application/json" \
 
 - Anthropic
 - GCP Anthropic
+- AWS Anthropic
 
 **Example:**
 
@@ -212,6 +213,7 @@ The following table summarizes which providers support which endpoints:
 | [Tetrate Agent Router Service (TARS)](https://router.tetrate.ai/)                                     |        ⚠️        |     ⚠️      |     ⚠️     |         ❌         | Via OpenAI-compatible API                                                                                            |
 | [Google Vertex AI](https://cloud.google.com/vertex-ai/docs/reference/rest)                            |        ✅        |     🚧      |     🚧     |         ❌         | Via OpenAI-compatible API                                                                                            |
 | [Anthropic on Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude) |        ✅        |     ❌      |     🚧     |         ✅         | Via OpenAI-compatible API and Native Anthropic API                                                                   |
+| [Anthropic on AWS Bedrock](https://aws.amazon.com/bedrock/anthropic/)                                 |        🚧        |     ❌      |     ❌     |         ✅         | Native Anthropic API                                                                                                 |
 | [SambaNova](https://docs.sambanova.ai/sambastudio/latest/open-ai-api.html)                            |        ✅        |     ⚠️      |     ✅     |         ❌         | Via OpenAI-compatible API                                                                                            |
 | [Anthropic](https://docs.claude.com/en/home)                                                          |        ✅        |     ❌      |     ❌     |         ✅         | Via OpenAI-compatible API and Native Anthropic API                                                                   |
 
diff --git a/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md b/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md
new file mode 100644
index 0000000000..2a63dd7b27
--- /dev/null
+++ b/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md
@@ -0,0 +1,344 @@
+---
+id: aws-bedrock-anthropic
+title: Connect AWS Bedrock (Anthropic Native API)
+sidebar_position: 4
+---
+
+# Connect AWS Bedrock with Anthropic Native API
+
+This guide shows you how to configure Envoy AI Gateway to use Anthropic models on AWS Bedrock with the **native Anthropic Messages API format**. This allows you to use the `/anthropic/v1/messages` endpoint to call Claude models hosted on AWS Bedrock.
+
+> [!NOTE]
+> If you want to use AWS Bedrock models with the OpenAI-compatible format (`/v1/chat/completions`), see the [AWS Bedrock guide](./aws-bedrock.md) instead.
+
+## Prerequisites
+
+Before you begin, you'll need:
+
+- AWS credentials with access to Bedrock
+- Basic setup completed from the [Basic Usage](../basic-usage.md) guide
+- Basic configuration removed as described in the [Advanced Configuration](./index.md) overview
+- Model access enabled for Anthropic Claude models in your AWS region
+
+## AWS Credentials Setup
+
+Ensure you have:
+
+1. An AWS account with Bedrock access enabled
+2. AWS credentials with permissions to:
+   - `bedrock:InvokeModel`
+   - `bedrock:ListFoundationModels`
+3. Your AWS access key ID and secret access key
+4. Enabled model access to Anthropic Claude models in your desired AWS region (e.g., `us-east-1`)
+   - Go to the AWS Bedrock console and request access to Anthropic models
+   - If you want to use a different AWS region, you must update all instances of `us-east-1` with the desired region in the configuration file
+
+> [!TIP]
+> Consider using AWS IAM roles and limited-scope credentials for production environments. For EKS clusters, AWS IAM Roles for Service Accounts (IRSA) is recommended.
+
+## Why Use the Native Anthropic API?
+
+The native Anthropic API provides several advantages when working with Claude models:
+
+- **Full feature support**: Access all Anthropic-specific features like extended thinking, prompt caching, and tool use
+- **Consistent API**: Use the same API format you would with Anthropic's direct API
+- **Better compatibility**: Avoid potential translation issues between OpenAI and Anthropic formats
+- **Feature parity**: Get immediate access to new Anthropic features as they're released
+
+## Configuration Steps
+
+> [!IMPORTANT]
+> Ensure you have followed the prerequisite steps in [Connect Providers](../connect-providers/) before proceeding.
+
+### 1. Download Configuration Template
+
+```shell
+curl -O https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/examples/basic/aws-bedrock-anthropic.yaml
+```
+
+### 2. Configure AWS Credentials
+
+Edit the `aws-bedrock-anthropic.yaml` file to replace these placeholder values:
+
+- `AWS_ACCESS_KEY_ID`: Your AWS access key ID
+- `AWS_SECRET_ACCESS_KEY`: Your AWS secret access key
+- Update the `region` field if you're using a region other than `us-east-1`
+- Update the model ID in the `value` field if you want to use a different Claude model
+
+> [!CAUTION]
+> Make sure to keep your AWS credentials secure and never commit them to version control. The credentials will be stored in Kubernetes secrets.
+
+### 3. Apply Configuration
+
+Apply the updated configuration and wait for the Gateway pod to be ready. If you already have a Gateway running, the secret credential update will be picked up automatically in a few seconds.
+
+```shell
+kubectl apply -f aws-bedrock-anthropic.yaml
+
+kubectl wait pods --timeout=2m \
+  -l gateway.envoyproxy.io/owning-gateway-name=envoy-ai-gateway-basic \
+  -n envoy-gateway-system \
+  --for=condition=Ready
+```
+
+### 4. Test the Configuration
+
+You should have set `$GATEWAY_URL` as part of the basic setup before connecting to providers. See the [Basic Usage](../basic-usage.md) page for instructions.
+
+Test your configuration using the native Anthropic Messages API format:
+
+```shell
+curl -H "Content-Type: application/json" \
+  -d '{
+    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+    "messages": [
+      {
+        "role": "user",
+        "content": "What is the capital of France?"
+      }
+    ],
+    "max_tokens": 100
+  }' \
+  $GATEWAY_URL/anthropic/v1/messages
+```
+
+Expected output:
+
+```json
+{
+  "id": "msg_01XFDUDYJgAACzvnptvVoYEL",
+  "type": "message",
+  "role": "assistant",
+  "content": [
+    {
+      "type": "text",
+      "text": "The capital of France is Paris."
+    }
+  ],
+  "model": "claude-3-5-sonnet-20241022",
+  "stop_reason": "end_turn",
+  "usage": {
+    "input_tokens": 13,
+    "output_tokens": 8
+  }
+}
+```
+
+### 5. Test Streaming
+
+The native Anthropic API also supports streaming responses:
+
+```shell
+curl -H "Content-Type: application/json" \
+  -d '{
+    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+    "messages": [
+      {
+        "role": "user",
+        "content": "Count from 1 to 5."
+      }
+    ],
+    "max_tokens": 100,
+    "stream": true
+  }' \
+  $GATEWAY_URL/anthropic/v1/messages
+```
+
+## Available Anthropic Models on AWS Bedrock
+
+AWS Bedrock supports several Claude model versions. Here are some commonly used model IDs:
+
+| Model Name                   | AWS Bedrock Model ID                      |
+| ---------------------------- | ----------------------------------------- |
+| Claude 3.5 Sonnet (Oct 2024) | anthropic.claude-3-5-sonnet-20241022-v2:0 |
+| Claude 3.5 Sonnet (Jun 2024) | anthropic.claude-3-5-sonnet-20240620-v1:0 |
+| Claude 3 Opus                | anthropic.claude-3-opus-20240229-v1:0     |
+| Claude 3 Sonnet              | anthropic.claude-3-sonnet-20240229-v1:0   |
+| Claude 3 Haiku               | anthropic.claude-3-haiku-20240307-v1:0    |
+
+> [!NOTE]
+> Model availability varies by AWS region. Check the [AWS Bedrock documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html) for the complete list of supported models in your region.
+
+## Configuring More Models
+
+To use additional models, add more `AIGatewayRoute` rules to the configuration file. Each rule should specify a different model ID:
+
+```yaml
+apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: AIGatewayRoute
+metadata:
+  name: envoy-ai-gateway-basic-aws-bedrock-anthropic
+  namespace: default
+spec:
+  parentRefs:
+    - name: envoy-ai-gateway-basic
+      kind: Gateway
+      group: gateway.networking.k8s.io
+  rules:
+    # Claude 3.5 Sonnet (Oct 2024)
+    - matches:
+        - headers:
+            - type: Exact
+              name: x-ai-eg-model
+              value: anthropic.claude-3-5-sonnet-20241022-v2:0
+      backendRefs:
+        - name: envoy-ai-gateway-basic-aws-bedrock-anthropic
+    # Claude 3 Opus
+    - matches:
+        - headers:
+            - type: Exact
+              name: x-ai-eg-model
+              value: anthropic.claude-3-opus-20240229-v1:0
+      backendRefs:
+        - name: envoy-ai-gateway-basic-aws-bedrock-anthropic
+```
+
+## Advanced Features
+
+### Using Anthropic-Specific Features
+
+Since this configuration uses the native Anthropic API, you have full access to Anthropic-specific features:
+
+#### Extended Thinking
+
+```shell
+curl -H "Content-Type: application/json" \
+  -d '{
+    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+    "messages": [
+      {
+        "role": "user",
+        "content": "Solve this puzzle: A farmer needs to cross a river with a fox, chicken, and bag of grain. The boat can only hold the farmer and one item. How does the farmer get everything across safely?"
+      }
+    ],
+    "max_tokens": 1000,
+    "thinking": {
+      "type": "enabled",
+      "budget_tokens": 5000
+    }
+  }' \
+  $GATEWAY_URL/anthropic/v1/messages
+```
+
+#### Prompt Caching
+
+```shell
+curl -H "Content-Type: application/json" \
+  -d '{
+    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+    "system": [
+      {
+        "type": "text",
+        "text": "You are an AI assistant specialized in Python programming. You help users write clean, efficient Python code.",
+        "cache_control": {"type": "ephemeral"}
+      }
+    ],
+    "messages": [
+      {
+        "role": "user",
+        "content": "Write a function to calculate fibonacci numbers."
+      }
+    ],
+    "max_tokens": 500
+  }' \
+  $GATEWAY_URL/anthropic/v1/messages
+```
+
+#### Tool Use (Function Calling)
+
+```shell
+curl -H "Content-Type: application/json" \
+  -d '{
+    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+    "messages": [
+      {
+        "role": "user",
+        "content": "What is the weather in San Francisco?"
+      }
+    ],
+    "max_tokens": 500,
+    "tools": [
+      {
+        "name": "get_weather",
+        "description": "Get the current weather in a given location",
+        "input_schema": {
+          "type": "object",
+          "properties": {
+            "location": {
+              "type": "string",
+              "description": "The city and state, e.g. San Francisco, CA"
+            }
+          },
+          "required": ["location"]
+        }
+      }
+    ]
+  }' \
+  $GATEWAY_URL/anthropic/v1/messages
+```
+
+## Troubleshooting
+
+If you encounter issues:
+
+1. **Verify your AWS credentials are correct and active**
+
+   ```shell
+   # Check if credentials are properly configured
+   kubectl get secret envoy-ai-gateway-basic-aws-bedrock-anthropic-credentials -n default -o yaml
+   ```
+
+2. **Check pod status**
+
+   ```shell
+   kubectl get pods -n envoy-gateway-system
+   ```
+
+3. **View controller logs**
+
+   ```shell
+   kubectl logs -n envoy-ai-gateway-system deployment/ai-gateway-controller
+   ```
+
+4. **View gateway pod logs**
+
+   ```shell
+   kubectl logs -n envoy-gateway-system -l gateway.envoyproxy.io/owning-gateway-name=envoy-ai-gateway-basic
+   ```
+
+### Common Errors
+
+| Error Code | Issue                                           | Solution                                                             |
+| ---------- | ----------------------------------------------- | -------------------------------------------------------------------- |
+| 401/403    | Invalid credentials or insufficient permissions | Verify AWS credentials and ensure Bedrock permissions are granted    |
+| 404        | Model not found or not available in region      | Check model ID and ensure model access is enabled in your AWS region |
+| 429        | Rate limit exceeded                             | Implement rate limiting or request quota increase from AWS           |
+| 400        | Invalid request format                          | Verify request body matches Anthropic API format                     |
+| 500        | AWS Bedrock internal error                      | Check AWS Bedrock service status and retry after a short delay       |
+
+## Security Considerations
+
+When deploying in production:
+
+1. **Use IAM Roles for Service Accounts (IRSA)** in EKS instead of static credentials
+2. **Implement request rate limiting** to control costs and prevent abuse
+3. **Enable audit logging** to track API usage and detect anomalies
+4. **Use least-privilege IAM policies** that only grant necessary permissions
+5. **Rotate credentials regularly** if using static access keys
+6. **Monitor token usage and costs** using the gateway's metrics
+
+## What's Next
+
+Now that you've connected AWS Bedrock with the native Anthropic API, explore these capabilities:
+
+- **[Usage-Based Rate Limiting](../../capabilities/traffic/usage-based-ratelimiting.md)** - Configure token-based rate limiting and cost controls
+- **[Provider Fallback](../../capabilities/traffic/provider-fallback.md)** - Set up automatic failover between AWS Bedrock and other Anthropic providers
+- **[Metrics and Monitoring](../../capabilities/observability/metrics.md)** - Monitor usage, costs, and performance metrics
+- **[Model Virtualization](../../capabilities/traffic/model-virtualization.md)** - Create virtual model names that route to different backends
+
+## References
+
+- [AWS Bedrock Anthropic Models Documentation](https://aws.amazon.com/bedrock/anthropic/)
+- [Anthropic API Reference](https://docs.anthropic.com/en/api)
+- [AWS Bedrock Model IDs](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html)
+- [AIGatewayRoute API Reference](../../api/api.mdx#aigatewayrouterule)
diff --git a/site/docs/getting-started/connect-providers/index.md b/site/docs/getting-started/connect-providers/index.md
index 2137c9ad63..fb23dc4abd 100644
--- a/site/docs/getting-started/connect-providers/index.md
+++ b/site/docs/getting-started/connect-providers/index.md
@@ -44,3 +44,4 @@ Choose your provider to get started:
 - [Connect OpenAI](./openai.md)
 - [Connect AWS Bedrock](./aws-bedrock.md)
 - [Connect Azure OpenAI](./azure-openai.md)
+- [Connect GCP VertexAI](./gcp-vertexai.md)
diff --git a/tests/extproc/extproc_test.go b/tests/extproc/extproc_test.go
index 3a7b9442fe..64c0ae228e 100644
--- a/tests/extproc/extproc_test.go
+++ b/tests/extproc/extproc_test.go
@@ -36,6 +36,7 @@ const (
 var (
 	openAISchema         = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1"}
 	awsBedrockSchema     = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAWSBedrock}
+	awsAnthropicSchema   = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAWSAnthropic}
 	azureOpenAISchema    = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAzureOpenAI, Version: "2025-01-01-preview"}
 	gcpVertexAISchema    = filterapi.VersionedAPISchema{Name: filterapi.APISchemaGCPVertexAI}
 	gcpAnthropicAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaGCPAnthropic, Version: "vertex-2023-10-16"}
diff --git a/tests/extproc/real_providers_test.go b/tests/extproc/real_providers_test.go
index fc51add5a4..843816b01b 100644
--- a/tests/extproc/real_providers_test.go
+++ b/tests/extproc/real_providers_test.go
@@ -50,6 +50,10 @@ func TestWithRealProviders(t *testing.T) {
 				CredentialFileLiteral: cc.AWSFileLiteral,
 				Region:                "us-east-1",
 			}}},
+			{Name: "aws-bedrock-anthropic", Schema: awsAnthropicSchema, Auth: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{
+				CredentialFileLiteral: cc.AWSFileLiteral,
+				Region:                "us-east-1",
+			}}},
 			{Name: "azure-openai", Schema: azureOpenAISchema, Auth: &filterapi.BackendAuth{
 				AzureAuth: &filterapi.AzureAuth{AccessToken: cc.AzureAccessToken},
 			}},

From 70183bcde0b7651b569dccb27cbddf004efa2ad8 Mon Sep 17 00:00:00 2001
From: Sebastian Poxhofer <secustor@users.noreply.github.com>
Date: Thu, 23 Oct 2025 17:38:47 +0200
Subject: [PATCH 02/15] Remove some doc blocks

Signed-off-by: Sebastian Poxhofer <secustor@users.noreply.github.com>
---
 .../aws-bedrock-anthropic.md                  | 41 +------------------
 1 file changed, 1 insertion(+), 40 deletions(-)

diff --git a/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md b/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md
index 2a63dd7b27..cd5e031304 100644
--- a/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md
+++ b/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md
@@ -28,6 +28,7 @@ Ensure you have:
 2. AWS credentials with permissions to:
    - `bedrock:InvokeModel`
    - `bedrock:ListFoundationModels`
+   - `aws-marketplace:ViewSubscriptions`
 3. Your AWS access key ID and secret access key
 4. Enabled model access to Anthropic Claude models in your desired AWS region (e.g., `us-east-1`)
    - Go to the AWS Bedrock console and request access to Anthropic models
@@ -144,20 +145,6 @@ curl -H "Content-Type: application/json" \
   $GATEWAY_URL/anthropic/v1/messages
 ```
 
-## Available Anthropic Models on AWS Bedrock
-
-AWS Bedrock supports several Claude model versions. Here are some commonly used model IDs:
-
-| Model Name                   | AWS Bedrock Model ID                      |
-| ---------------------------- | ----------------------------------------- |
-| Claude 3.5 Sonnet (Oct 2024) | anthropic.claude-3-5-sonnet-20241022-v2:0 |
-| Claude 3.5 Sonnet (Jun 2024) | anthropic.claude-3-5-sonnet-20240620-v1:0 |
-| Claude 3 Opus                | anthropic.claude-3-opus-20240229-v1:0     |
-| Claude 3 Sonnet              | anthropic.claude-3-sonnet-20240229-v1:0   |
-| Claude 3 Haiku               | anthropic.claude-3-haiku-20240307-v1:0    |
-
-> [!NOTE]
-> Model availability varies by AWS region. Check the [AWS Bedrock documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html) for the complete list of supported models in your region.
 
 ## Configuring More Models
 
@@ -316,29 +303,3 @@ If you encounter issues:
 | 400        | Invalid request format                          | Verify request body matches Anthropic API format                     |
 | 500        | AWS Bedrock internal error                      | Check AWS Bedrock service status and retry after a short delay       |
 
-## Security Considerations
-
-When deploying in production:
-
-1. **Use IAM Roles for Service Accounts (IRSA)** in EKS instead of static credentials
-2. **Implement request rate limiting** to control costs and prevent abuse
-3. **Enable audit logging** to track API usage and detect anomalies
-4. **Use least-privilege IAM policies** that only grant necessary permissions
-5. **Rotate credentials regularly** if using static access keys
-6. **Monitor token usage and costs** using the gateway's metrics
-
-## What's Next
-
-Now that you've connected AWS Bedrock with the native Anthropic API, explore these capabilities:
-
-- **[Usage-Based Rate Limiting](../../capabilities/traffic/usage-based-ratelimiting.md)** - Configure token-based rate limiting and cost controls
-- **[Provider Fallback](../../capabilities/traffic/provider-fallback.md)** - Set up automatic failover between AWS Bedrock and other Anthropic providers
-- **[Metrics and Monitoring](../../capabilities/observability/metrics.md)** - Monitor usage, costs, and performance metrics
-- **[Model Virtualization](../../capabilities/traffic/model-virtualization.md)** - Create virtual model names that route to different backends
-
-## References
-
-- [AWS Bedrock Anthropic Models Documentation](https://aws.amazon.com/bedrock/anthropic/)
-- [Anthropic API Reference](https://docs.anthropic.com/en/api)
-- [AWS Bedrock Model IDs](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html)
-- [AIGatewayRoute API Reference](../../api/api.mdx#aigatewayrouterule)

From 060f83e3f2afd67e0ee6975e66ec67d8879aecbc Mon Sep 17 00:00:00 2001
From: secustor <sebastian@poxhofer.at>
Date: Thu, 23 Oct 2025 22:12:15 +0200
Subject: [PATCH 03/15] docs: merge aws and aws anthropic docs

Signed-off-by: secustor <sebastian@poxhofer.at>
---
 examples/basic/aws-bedrock-anthropic.yaml     |  93 ------
 examples/basic/aws.yaml                       |  37 +++
 .../aws-bedrock-anthropic.md                  | 305 ------------------
 .../connect-providers/aws-bedrock.md          | 224 ++++++++++++-
 4 files changed, 253 insertions(+), 406 deletions(-)
 delete mode 100644 examples/basic/aws-bedrock-anthropic.yaml
 delete mode 100644 site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md

diff --git a/examples/basic/aws-bedrock-anthropic.yaml b/examples/basic/aws-bedrock-anthropic.yaml
deleted file mode 100644
index b2db5df483..0000000000
--- a/examples/basic/aws-bedrock-anthropic.yaml
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright Envoy AI Gateway Authors
-# SPDX-License-Identifier: Apache-2.0
-# The full text of the Apache license is available in the LICENSE file at
-# the root of the repo.
-
-apiVersion: aigateway.envoyproxy.io/v1alpha1
-kind: AIGatewayRoute
-metadata:
-  name: envoy-ai-gateway-basic-aws-bedrock-anthropic
-  namespace: default
-spec:
-  parentRefs:
-    - name: envoy-ai-gateway-basic
-      kind: Gateway
-      group: gateway.networking.k8s.io
-  rules:
-    - matches:
-        - headers:
-            - type: Exact
-              name: x-ai-eg-model
-              value: anthropic.claude-3-5-sonnet-20241022-v2:0
-      backendRefs:
-        - name: envoy-ai-gateway-basic-aws-bedrock-anthropic
----
-apiVersion: aigateway.envoyproxy.io/v1alpha1
-kind: AIServiceBackend
-metadata:
-  name: envoy-ai-gateway-basic-aws-bedrock-anthropic
-  namespace: default
-spec:
-  schema:
-    name: AWSAnthropic
-    version: bedrock-2023-05-31
-  backendRef:
-    name: envoy-ai-gateway-basic-aws-bedrock-anthropic
-    kind: Backend
-    group: gateway.envoyproxy.io
----
-apiVersion: aigateway.envoyproxy.io/v1alpha1
-kind: BackendSecurityPolicy
-metadata:
-  name: envoy-ai-gateway-basic-aws-bedrock-anthropic-credentials
-  namespace: default
-spec:
-  targetRefs:
-    - group: aigateway.envoyproxy.io
-      kind: AIServiceBackend
-      name: envoy-ai-gateway-basic-aws-bedrock-anthropic
-  type: AWSCredentials
-  awsCredentials:
-    region: us-east-1
-    credentialsFile:
-      secretRef:
-        name: envoy-ai-gateway-basic-aws-bedrock-anthropic-credentials
----
-apiVersion: gateway.envoyproxy.io/v1alpha1
-kind: Backend
-metadata:
-  name: envoy-ai-gateway-basic-aws-bedrock-anthropic
-  namespace: default
-spec:
-  endpoints:
-    - fqdn:
-        hostname: bedrock-runtime.us-east-1.amazonaws.com
-        port: 443
----
-apiVersion: gateway.networking.k8s.io/v1alpha3
-kind: BackendTLSPolicy
-metadata:
-  name: envoy-ai-gateway-basic-aws-bedrock-anthropic-tls
-  namespace: default
-spec:
-  targetRefs:
-    - group: "gateway.envoyproxy.io"
-      kind: Backend
-      name: envoy-ai-gateway-basic-aws-bedrock-anthropic
-  validation:
-    wellKnownCACertificates: "System"
-    hostname: bedrock-runtime.us-east-1.amazonaws.com
----
-apiVersion: v1
-kind: Secret
-metadata:
-  name: envoy-ai-gateway-basic-aws-bedrock-anthropic-credentials
-  namespace: default
-type: Opaque
-stringData:
-  # Replace this with your AWS credentials.
-  # You can also use AWS IAM roles for service accounts (IRSA) in EKS.
-  credentials: |
-    [default]
-    aws_access_key_id = AWS_ACCESS_KEY_ID
-    aws_secret_access_key = AWS_SECRET_ACCESS_KEY
diff --git a/examples/basic/aws.yaml b/examples/basic/aws.yaml
index 7bc37a4b2b..784972326c 100644
--- a/examples/basic/aws.yaml
+++ b/examples/basic/aws.yaml
@@ -23,6 +23,25 @@ spec:
         - name: envoy-ai-gateway-basic-aws
 ---
 apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: AIGatewayRoute
+metadata:
+  name: envoy-ai-gateway-basic-aws-bedrock-anthropic
+  namespace: default
+spec:
+  parentRefs:
+    - name: envoy-ai-gateway-basic
+      kind: Gateway
+      group: gateway.networking.k8s.io
+  rules:
+    - matches:
+        - headers:
+            - type: Exact
+              name: x-ai-eg-model
+              value: anthropic.claude-3-5-sonnet-20241022-v2:0
+      backendRefs:
+        - name: envoy-ai-gateway-basic-aws-bedrock-anthropic
+---
+apiVersion: aigateway.envoyproxy.io/v1alpha1
 kind: AIServiceBackend
 metadata:
   name: envoy-ai-gateway-basic-aws
@@ -36,6 +55,20 @@ spec:
     group: gateway.envoyproxy.io
 ---
 apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: AIServiceBackend
+metadata:
+  name: envoy-ai-gateway-basic-aws-bedrock-anthropic
+  namespace: default
+spec:
+  schema:
+    name: AWSAnthropic
+    version: bedrock-2023-05-31
+  backendRef:
+    name: envoy-ai-gateway-basic-aws
+    kind: Backend
+    group: gateway.envoyproxy.io
+---
+apiVersion: aigateway.envoyproxy.io/v1alpha1
 kind: BackendSecurityPolicy
 metadata:
   name: envoy-ai-gateway-basic-aws-credentials
@@ -45,6 +78,9 @@ spec:
     - group: aigateway.envoyproxy.io
       kind: AIServiceBackend
       name: envoy-ai-gateway-basic-aws
+    - group: aigateway.envoyproxy.io
+      kind: AIServiceBackend
+      name: envoy-ai-gateway-basic-aws-bedrock-anthropic
   type: AWSCredentials
   awsCredentials:
     region: us-east-1
@@ -85,6 +121,7 @@ metadata:
 type: Opaque
 stringData:
   # Replace this with your AWS credentials.
+  # You can also use AWS IAM roles for service accounts (IRSA) in EKS.
   credentials: |
     [default]
     aws_access_key_id = AWS_ACCESS_KEY_ID
diff --git a/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md b/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md
deleted file mode 100644
index cd5e031304..0000000000
--- a/site/docs/getting-started/connect-providers/aws-bedrock-anthropic.md
+++ /dev/null
@@ -1,305 +0,0 @@
----
-id: aws-bedrock-anthropic
-title: Connect AWS Bedrock (Anthropic Native API)
-sidebar_position: 4
----
-
-# Connect AWS Bedrock with Anthropic Native API
-
-This guide shows you how to configure Envoy AI Gateway to use Anthropic models on AWS Bedrock with the **native Anthropic Messages API format**. This allows you to use the `/anthropic/v1/messages` endpoint to call Claude models hosted on AWS Bedrock.
-
-> [!NOTE]
-> If you want to use AWS Bedrock models with the OpenAI-compatible format (`/v1/chat/completions`), see the [AWS Bedrock guide](./aws-bedrock.md) instead.
-
-## Prerequisites
-
-Before you begin, you'll need:
-
-- AWS credentials with access to Bedrock
-- Basic setup completed from the [Basic Usage](../basic-usage.md) guide
-- Basic configuration removed as described in the [Advanced Configuration](./index.md) overview
-- Model access enabled for Anthropic Claude models in your AWS region
-
-## AWS Credentials Setup
-
-Ensure you have:
-
-1. An AWS account with Bedrock access enabled
-2. AWS credentials with permissions to:
-   - `bedrock:InvokeModel`
-   - `bedrock:ListFoundationModels`
-   - `aws-marketplace:ViewSubscriptions`
-3. Your AWS access key ID and secret access key
-4. Enabled model access to Anthropic Claude models in your desired AWS region (e.g., `us-east-1`)
-   - Go to the AWS Bedrock console and request access to Anthropic models
-   - If you want to use a different AWS region, you must update all instances of `us-east-1` with the desired region in the configuration file
-
-> [!TIP]
-> Consider using AWS IAM roles and limited-scope credentials for production environments. For EKS clusters, AWS IAM Roles for Service Accounts (IRSA) is recommended.
-
-## Why Use the Native Anthropic API?
-
-The native Anthropic API provides several advantages when working with Claude models:
-
-- **Full feature support**: Access all Anthropic-specific features like extended thinking, prompt caching, and tool use
-- **Consistent API**: Use the same API format you would with Anthropic's direct API
-- **Better compatibility**: Avoid potential translation issues between OpenAI and Anthropic formats
-- **Feature parity**: Get immediate access to new Anthropic features as they're released
-
-## Configuration Steps
-
-> [!IMPORTANT]
-> Ensure you have followed the prerequisite steps in [Connect Providers](../connect-providers/) before proceeding.
-
-### 1. Download Configuration Template
-
-```shell
-curl -O https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/examples/basic/aws-bedrock-anthropic.yaml
-```
-
-### 2. Configure AWS Credentials
-
-Edit the `aws-bedrock-anthropic.yaml` file to replace these placeholder values:
-
-- `AWS_ACCESS_KEY_ID`: Your AWS access key ID
-- `AWS_SECRET_ACCESS_KEY`: Your AWS secret access key
-- Update the `region` field if you're using a region other than `us-east-1`
-- Update the model ID in the `value` field if you want to use a different Claude model
-
-> [!CAUTION]
-> Make sure to keep your AWS credentials secure and never commit them to version control. The credentials will be stored in Kubernetes secrets.
-
-### 3. Apply Configuration
-
-Apply the updated configuration and wait for the Gateway pod to be ready. If you already have a Gateway running, the secret credential update will be picked up automatically in a few seconds.
-
-```shell
-kubectl apply -f aws-bedrock-anthropic.yaml
-
-kubectl wait pods --timeout=2m \
-  -l gateway.envoyproxy.io/owning-gateway-name=envoy-ai-gateway-basic \
-  -n envoy-gateway-system \
-  --for=condition=Ready
-```
-
-### 4. Test the Configuration
-
-You should have set `$GATEWAY_URL` as part of the basic setup before connecting to providers. See the [Basic Usage](../basic-usage.md) page for instructions.
-
-Test your configuration using the native Anthropic Messages API format:
-
-```shell
-curl -H "Content-Type: application/json" \
-  -d '{
-    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
-    "messages": [
-      {
-        "role": "user",
-        "content": "What is the capital of France?"
-      }
-    ],
-    "max_tokens": 100
-  }' \
-  $GATEWAY_URL/anthropic/v1/messages
-```
-
-Expected output:
-
-```json
-{
-  "id": "msg_01XFDUDYJgAACzvnptvVoYEL",
-  "type": "message",
-  "role": "assistant",
-  "content": [
-    {
-      "type": "text",
-      "text": "The capital of France is Paris."
-    }
-  ],
-  "model": "claude-3-5-sonnet-20241022",
-  "stop_reason": "end_turn",
-  "usage": {
-    "input_tokens": 13,
-    "output_tokens": 8
-  }
-}
-```
-
-### 5. Test Streaming
-
-The native Anthropic API also supports streaming responses:
-
-```shell
-curl -H "Content-Type: application/json" \
-  -d '{
-    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
-    "messages": [
-      {
-        "role": "user",
-        "content": "Count from 1 to 5."
-      }
-    ],
-    "max_tokens": 100,
-    "stream": true
-  }' \
-  $GATEWAY_URL/anthropic/v1/messages
-```
-
-
-## Configuring More Models
-
-To use additional models, add more `AIGatewayRoute` rules to the configuration file. Each rule should specify a different model ID:
-
-```yaml
-apiVersion: aigateway.envoyproxy.io/v1alpha1
-kind: AIGatewayRoute
-metadata:
-  name: envoy-ai-gateway-basic-aws-bedrock-anthropic
-  namespace: default
-spec:
-  parentRefs:
-    - name: envoy-ai-gateway-basic
-      kind: Gateway
-      group: gateway.networking.k8s.io
-  rules:
-    # Claude 3.5 Sonnet (Oct 2024)
-    - matches:
-        - headers:
-            - type: Exact
-              name: x-ai-eg-model
-              value: anthropic.claude-3-5-sonnet-20241022-v2:0
-      backendRefs:
-        - name: envoy-ai-gateway-basic-aws-bedrock-anthropic
-    # Claude 3 Opus
-    - matches:
-        - headers:
-            - type: Exact
-              name: x-ai-eg-model
-              value: anthropic.claude-3-opus-20240229-v1:0
-      backendRefs:
-        - name: envoy-ai-gateway-basic-aws-bedrock-anthropic
-```
-
-## Advanced Features
-
-### Using Anthropic-Specific Features
-
-Since this configuration uses the native Anthropic API, you have full access to Anthropic-specific features:
-
-#### Extended Thinking
-
-```shell
-curl -H "Content-Type: application/json" \
-  -d '{
-    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
-    "messages": [
-      {
-        "role": "user",
-        "content": "Solve this puzzle: A farmer needs to cross a river with a fox, chicken, and bag of grain. The boat can only hold the farmer and one item. How does the farmer get everything across safely?"
-      }
-    ],
-    "max_tokens": 1000,
-    "thinking": {
-      "type": "enabled",
-      "budget_tokens": 5000
-    }
-  }' \
-  $GATEWAY_URL/anthropic/v1/messages
-```
-
-#### Prompt Caching
-
-```shell
-curl -H "Content-Type: application/json" \
-  -d '{
-    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
-    "system": [
-      {
-        "type": "text",
-        "text": "You are an AI assistant specialized in Python programming. You help users write clean, efficient Python code.",
-        "cache_control": {"type": "ephemeral"}
-      }
-    ],
-    "messages": [
-      {
-        "role": "user",
-        "content": "Write a function to calculate fibonacci numbers."
-      }
-    ],
-    "max_tokens": 500
-  }' \
-  $GATEWAY_URL/anthropic/v1/messages
-```
-
-#### Tool Use (Function Calling)
-
-```shell
-curl -H "Content-Type: application/json" \
-  -d '{
-    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
-    "messages": [
-      {
-        "role": "user",
-        "content": "What is the weather in San Francisco?"
-      }
-    ],
-    "max_tokens": 500,
-    "tools": [
-      {
-        "name": "get_weather",
-        "description": "Get the current weather in a given location",
-        "input_schema": {
-          "type": "object",
-          "properties": {
-            "location": {
-              "type": "string",
-              "description": "The city and state, e.g. San Francisco, CA"
-            }
-          },
-          "required": ["location"]
-        }
-      }
-    ]
-  }' \
-  $GATEWAY_URL/anthropic/v1/messages
-```
-
-## Troubleshooting
-
-If you encounter issues:
-
-1. **Verify your AWS credentials are correct and active**
-
-   ```shell
-   # Check if credentials are properly configured
-   kubectl get secret envoy-ai-gateway-basic-aws-bedrock-anthropic-credentials -n default -o yaml
-   ```
-
-2. **Check pod status**
-
-   ```shell
-   kubectl get pods -n envoy-gateway-system
-   ```
-
-3. **View controller logs**
-
-   ```shell
-   kubectl logs -n envoy-ai-gateway-system deployment/ai-gateway-controller
-   ```
-
-4. **View gateway pod logs**
-
-   ```shell
-   kubectl logs -n envoy-gateway-system -l gateway.envoyproxy.io/owning-gateway-name=envoy-ai-gateway-basic
-   ```
-
-### Common Errors
-
-| Error Code | Issue                                           | Solution                                                             |
-| ---------- | ----------------------------------------------- | -------------------------------------------------------------------- |
-| 401/403    | Invalid credentials or insufficient permissions | Verify AWS credentials and ensure Bedrock permissions are granted    |
-| 404        | Model not found or not available in region      | Check model ID and ensure model access is enabled in your AWS region |
-| 429        | Rate limit exceeded                             | Implement rate limiting or request quota increase from AWS           |
-| 400        | Invalid request format                          | Verify request body matches Anthropic API format                     |
-| 500        | AWS Bedrock internal error                      | Check AWS Bedrock service status and retry after a short delay       |
-
diff --git a/site/docs/getting-started/connect-providers/aws-bedrock.md b/site/docs/getting-started/connect-providers/aws-bedrock.md
index 680af63a1d..91ed0e4389 100644
--- a/site/docs/getting-started/connect-providers/aws-bedrock.md
+++ b/site/docs/getting-started/connect-providers/aws-bedrock.md
@@ -6,7 +6,7 @@ sidebar_position: 3
 
 # Connect AWS Bedrock
 
-This guide will help you configure Envoy AI Gateway to work with AWS Bedrock's foundation models.
+This guide will help you configure Envoy AI Gateway to work with AWS Bedrock's foundation models, including Llama, Anthropic Claude, and other models available on AWS Bedrock.
 
 ## Prerequisites
 
@@ -24,6 +24,7 @@ Ensure you have:
 2. AWS credentials with permissions to:
    - `bedrock:InvokeModel`
    - `bedrock:ListFoundationModels`
+   - `aws-marketplace:ViewSubscriptions` ( for Anthropic models )
 3. Your AWS access key ID and secret access key
 4. Enabled model access to "Llama 3.2 1B Instruct" in the `us-east-1` region
    - If you want to use a different AWS region, you must update all instances of the string
@@ -76,6 +77,8 @@ kubectl wait pods --timeout=2m \
 You should have set `$GATEWAY_URL` as part of the basic setup before connecting to providers.
 See the [Basic Usage](../basic-usage.md) page for instructions.
 
+To access a Llama model with chat completion endpoint:
+
 ```shell
 curl -H "Content-Type: application/json" \
   -d '{
@@ -90,23 +93,119 @@ curl -H "Content-Type: application/json" \
   $GATEWAY_URL/v1/chat/completions
 ```
 
+To access an Anthropic model with chat completion endpoint:
+
+```shell
+curl -H "Content-Type: application/json" \
+  -d '{
+    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+    "messages": [
+      {
+        "role": "user",
+        "content": "What is capital of France?"
+      }
+    ],
+    "max_completion_tokens": 100
+  }' \
+  $GATEWAY_URL/v1/chat/completions
+```
+
+Expected output:
+
+```json
+{
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "The capital of France is Paris.",
+        "role": "assistant"
+      }
+    }
+  ],
+  "object": "chat.completion",
+  "usage": { "completion_tokens": 8, "prompt_tokens": 13, "total_tokens": 21 }
+}
+```
+
+You can also access an Anthropic model with native Anthropic messages endpoint:
+
+```shell
+curl -H "Content-Type: application/json" \
+  -d '{
+    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+    "messages": [
+      {
+        "role": "user",
+        "content": "What is capital of France?"
+      }
+    ],
+    "max_tokens": 100
+  }' \
+  $GATEWAY_URL/anthropic/v1/messages
+```
+
+Expected output:
+
+```json
+{
+  "id": "msg_01XFDUDYJgAACzvnptvVoYEL",
+  "type": "message",
+  "role": "assistant",
+  "content": [
+    {
+      "type": "text",
+      "text": "The capital of France is Paris."
+    }
+  ],
+  "model": "claude-3-5-sonnet-20241022",
+  "stop_reason": "end_turn",
+  "usage": {
+    "input_tokens": 13,
+    "output_tokens": 8
+  }
+}
+```
+
 ## Troubleshooting
 
 If you encounter issues:
 
-1. Verify your AWS credentials are correct and active
-2. Check pod status:
+1. **Verify your AWS credentials are correct and active**
+
+   ```shell
+   # Check if credentials are properly configured
+   kubectl get secret -n default -o yaml
+   ```
+
+2. **Check pod status**
+
    ```shell
    kubectl get pods -n envoy-gateway-system
    ```
-3. View controller logs:
+
+3. **View controller logs**
+
    ```shell
    kubectl logs -n envoy-ai-gateway-system deployment/ai-gateway-controller
    ```
-4. Common errors:
-   - 401/403: Invalid credentials or insufficient permissions
-   - 404: Model not found or not available in region
-   - 429: Rate limit exceeded
+
+4. **View gateway pod logs**
+
+   ```shell
+   kubectl logs -n envoy-gateway-system -l gateway.envoyproxy.io/owning-gateway-name=envoy-ai-gateway-basic
+   ```
+
+### Common Errors
+
+| Error Code | Issue                                           | Solution                                                             |
+| ---------- | ----------------------------------------------- | -------------------------------------------------------------------- |
+| 401/403    | Invalid credentials or insufficient permissions | Verify AWS credentials and ensure Bedrock permissions are granted    |
+| 404        | Model not found or not available in region      | Check model ID and ensure model access is enabled in your AWS region |
+| 429        | Rate limit exceeded                             | Implement rate limiting or request quota increase from AWS           |
+| 400        | Invalid request format                          | Verify request body matches the expected API format                  |
+| 500        | AWS Bedrock internal error                      | Check AWS Bedrock service status and retry after a short delay       |
 
 ## Configuring More Models
 
@@ -133,6 +232,115 @@ spec:
         - name: envoy-ai-gateway-basic-aws
 ```
 
+## Using Anthropic Native API
+
+When using Anthropic models on AWS Bedrock, you have two options:
+
+1. **OpenAI-compatible format** (`/v1/chat/completions`) - Works with most models but may not support all Anthropic-specific features
+2. **Native Anthropic API** (`/anthropic/v1/messages`) - Provides full access to Anthropic-specific features (only for Anthropic models)
+
+### Streaming with Native Anthropic API
+
+The native Anthropic API also supports streaming responses:
+
+```shell
+curl -H "Content-Type: application/json" \
+  -d '{
+    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+    "messages": [
+      {
+        "role": "user",
+        "content": "Count from 1 to 5."
+      }
+    ],
+    "max_tokens": 100,
+    "stream": true
+  }' \
+  $GATEWAY_URL/anthropic/v1/messages
+```
+
+## Advanced Features with Anthropic Models
+
+Since the gateway supports the native Anthropic API, you have full access to Anthropic-specific features:
+
+### Extended Thinking
+
+```shell
+curl -H "Content-Type: application/json" \
+  -d '{
+    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+    "messages": [
+      {
+        "role": "user",
+        "content": "Solve this puzzle: A farmer needs to cross a river with a fox, chicken, and bag of grain. The boat can only hold the farmer and one item. How does the farmer get everything across safely?"
+      }
+    ],
+    "max_tokens": 1000,
+    "thinking": {
+      "type": "enabled",
+      "budget_tokens": 5000
+    }
+  }' \
+  $GATEWAY_URL/anthropic/v1/messages
+```
+
+### Prompt Caching
+
+```shell
+curl -H "Content-Type: application/json" \
+  -d '{
+    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+    "system": [
+      {
+        "type": "text",
+        "text": "You are an AI assistant specialized in Python programming. You help users write clean, efficient Python code.",
+        "cache_control": {"type": "ephemeral"}
+      }
+    ],
+    "messages": [
+      {
+        "role": "user",
+        "content": "Write a function to calculate fibonacci numbers."
+      }
+    ],
+    "max_tokens": 500
+  }' \
+  $GATEWAY_URL/anthropic/v1/messages
+```
+
+### Tool Use (Function Calling)
+
+```shell
+curl -H "Content-Type: application/json" \
+  -d '{
+    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+    "messages": [
+      {
+        "role": "user",
+        "content": "What is the weather in San Francisco?"
+      }
+    ],
+    "max_tokens": 500,
+    "tools": [
+      {
+        "name": "get_weather",
+        "description": "Get the current weather in a given location",
+        "input_schema": {
+          "type": "object",
+          "properties": {
+            "location": {
+              "type": "string",
+              "description": "The city and state, e.g. San Francisco, CA"
+            }
+          },
+          "required": ["location"]
+        }
+      }
+    ]
+  }' \
+  $GATEWAY_URL/anthropic/v1/messages
+```
+
 [AIGatewayRouteRule]: ../../api/api.mdx#aigatewayrouterule
 [model ID]: https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html
 [Claude 3 Sonnet]: https://docs.anthropic.com/en/docs/about-claude/models#model-comparison-table

From 460c72e67b330a4f408113755b6641f209089f8f Mon Sep 17 00:00:00 2001
From: secustor <sebastian@poxhofer.at>
Date: Thu, 23 Oct 2025 23:33:15 +0200
Subject: [PATCH 04/15] refactor: extract ResponseHandler and
 applyModelNameOverride

Signed-off-by: secustor <sebastian@poxhofer.at>
---
 .../translator/anthropic_awsanthropic.go      |  94 +-----------
 .../translator/anthropic_gcpanthropic.go      |  94 +-----------
 .../extproc/translator/anthropic_helper.go    | 143 ++++++++++++++++++
 3 files changed, 155 insertions(+), 176 deletions(-)
 create mode 100644 internal/extproc/translator/anthropic_helper.go

diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go
index 735c9a8119..6968fd731c 100644
--- a/internal/extproc/translator/anthropic_awsanthropic.go
+++ b/internal/extproc/translator/anthropic_awsanthropic.go
@@ -6,14 +6,12 @@
 package translator
 
 import (
-	"bytes"
 	"encoding/json"
 	"fmt"
 	"io"
 	"maps"
 	"net/url"
 
-	"github.com/anthropics/anthropic-sdk-go"
 	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 
 	anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic"
@@ -27,14 +25,15 @@ func NewAnthropicToAWSAnthropicTranslator(apiVersion string, modelNameOverride i
 	return &anthropicToAWSAnthropicTranslator{
 		apiVersion:        apiVersion,
 		modelNameOverride: modelNameOverride,
+		responseHandler:   newAnthropicResponseHandler(),
 	}
 }
 
 type anthropicToAWSAnthropicTranslator struct {
-	// TODO: reuse anthropicToAnthropicTranslator and embed it here to avoid code duplication.
 	apiVersion        string
 	modelNameOverride internalapi.ModelNameOverride
 	requestModel      internalapi.RequestModel
+	responseHandler   *anthropicResponseHandler
 }
 
 // RequestBody implements [AnthropicMessagesTranslator.RequestBody] for Anthropic to AWS Bedrock Anthropic translation.
@@ -50,10 +49,7 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(_ []byte, body *anthropi
 	maps.Copy(anthropicReq, *body)
 
 	// Apply model name override if configured.
-	a.requestModel = modelName
-	if a.modelNameOverride != "" {
-		a.requestModel = a.modelNameOverride
-	}
+	a.requestModel = applyModelNameOverride(modelName, a.modelNameOverride)
 
 	// Remove the model field since AWS Bedrock doesn't want it in the body (it's in the path).
 	delete(anthropicReq, "model")
@@ -98,87 +94,9 @@ func (a *anthropicToAWSAnthropicTranslator) ResponseHeaders(_ map[string]string)
 }
 
 // ResponseBody implements [AnthropicMessagesTranslator.ResponseBody] for Anthropic to AWS Bedrock Anthropic.
-// This is essentially a passthrough since AWS Bedrock returns the native Anthropic response format.
-func (a *anthropicToAWSAnthropicTranslator) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool) (
+// This delegates to the shared anthropicResponseHandler since AWS Bedrock returns the native Anthropic response format.
+func (a *anthropicToAWSAnthropicTranslator) ResponseBody(headers map[string]string, body io.Reader, endOfStream bool) (
 	headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error,
 ) {
-	// Read the response body for both streaming and non-streaming.
-	bodyBytes, err := io.ReadAll(body)
-	if err != nil {
-		return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err)
-	}
-
-	// For streaming chunks, parse SSE format to extract token usage.
-	if !endOfStream {
-		// Parse SSE format - split by lines and look for data: lines.
-		for line := range bytes.Lines(bodyBytes) {
-			line = bytes.TrimSpace(line)
-			if bytes.HasPrefix(line, dataPrefix) {
-				jsonData := bytes.TrimPrefix(line, dataPrefix)
-
-				var eventData map[string]any
-				if unmarshalErr := json.Unmarshal(jsonData, &eventData); unmarshalErr != nil {
-					// Skip lines with invalid JSON (like ping events or malformed data).
-					continue
-				}
-				if eventType, ok := eventData["type"].(string); ok {
-					switch eventType {
-					case "message_start":
-						// Extract input tokens from message.usage.
-						if messageData, ok := eventData["message"].(map[string]any); ok {
-							if usageData, ok := messageData["usage"].(map[string]any); ok {
-								if inputTokens, ok := usageData["input_tokens"].(float64); ok {
-									tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec
-								}
-								// Some message_start events may include initial output tokens.
-								if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 {
-									tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec
-								}
-								tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens
-							}
-						}
-
-					case "message_delta":
-						if usageData, ok := eventData["usage"].(map[string]any); ok {
-							if outputTokens, ok := usageData["output_tokens"].(float64); ok {
-								// Add to existing output tokens (in case message_start had some initial ones).
-								tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec
-								tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens
-							}
-						}
-					}
-				}
-			}
-		}
-
-		return nil, &extprocv3.BodyMutation{
-			Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
-		}, tokenUsage, a.requestModel, nil
-	}
-
-	// Parse the Anthropic response to extract token usage.
-	var anthropicResp anthropic.Message
-	if err = json.Unmarshal(bodyBytes, &anthropicResp); err != nil {
-		// If we can't parse as Anthropic format, pass through as-is.
-		return nil, &extprocv3.BodyMutation{
-			Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
-		}, LLMTokenUsage{}, a.requestModel, nil
-	}
-
-	// Extract token usage from the response.
-	tokenUsage = LLMTokenUsage{
-		InputTokens:       uint32(anthropicResp.Usage.InputTokens),                                    //nolint:gosec
-		OutputTokens:      uint32(anthropicResp.Usage.OutputTokens),                                   //nolint:gosec
-		TotalTokens:       uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec
-		CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens),                           //nolint:gosec
-	}
-
-	// Pass through the response body unchanged since both input and output are Anthropic format.
-	headerMutation = &extprocv3.HeaderMutation{}
-	setContentLength(headerMutation, bodyBytes)
-	bodyMutation = &extprocv3.BodyMutation{
-		Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
-	}
-
-	return headerMutation, bodyMutation, tokenUsage, a.requestModel, nil
+	return a.responseHandler.ResponseBody(headers, body, endOfStream, a.requestModel)
 }
diff --git a/internal/extproc/translator/anthropic_gcpanthropic.go b/internal/extproc/translator/anthropic_gcpanthropic.go
index 37a5d4a5a3..f5a8bbc799 100644
--- a/internal/extproc/translator/anthropic_gcpanthropic.go
+++ b/internal/extproc/translator/anthropic_gcpanthropic.go
@@ -6,13 +6,11 @@
 package translator
 
 import (
-	"bytes"
 	"encoding/json"
 	"fmt"
 	"io"
 	"maps"
 
-	"github.com/anthropics/anthropic-sdk-go"
 	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 
 	anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic"
@@ -25,14 +23,15 @@ func NewAnthropicToGCPAnthropicTranslator(apiVersion string, modelNameOverride i
 	return &anthropicToGCPAnthropicTranslator{
 		apiVersion:        apiVersion,
 		modelNameOverride: modelNameOverride,
+		responseHandler:   newAnthropicResponseHandler(),
 	}
 }
 
 type anthropicToGCPAnthropicTranslator struct {
-	// TODO: reuse anthropicToAnthropicTranslator and embed it here to avoid code duplication.
 	apiVersion        string
 	modelNameOverride internalapi.ModelNameOverride
 	requestModel      internalapi.RequestModel
+	responseHandler   *anthropicResponseHandler
 }
 
 // RequestBody implements [AnthropicMessagesTranslator.RequestBody] for Anthropic to GCP Anthropic translation.
@@ -48,10 +47,7 @@ func (a *anthropicToGCPAnthropicTranslator) RequestBody(_ []byte, body *anthropi
 	maps.Copy(anthropicReq, *body)
 
 	// Apply model name override if configured.
-	a.requestModel = modelName
-	if a.modelNameOverride != "" {
-		a.requestModel = a.modelNameOverride
-	}
+	a.requestModel = applyModelNameOverride(modelName, a.modelNameOverride)
 
 	// Remove the model field since GCP doesn't want it in the body.
 	delete(anthropicReq, "model")
@@ -90,87 +86,9 @@ func (a *anthropicToGCPAnthropicTranslator) ResponseHeaders(_ map[string]string)
 }
 
 // ResponseBody implements [AnthropicMessagesTranslator.ResponseBody] for Anthropic to GCP Anthropic.
-// This is essentially a passthrough since both use the same Anthropic response format.
-func (a *anthropicToGCPAnthropicTranslator) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool) (
+// This delegates to the shared anthropicResponseHandler since GCP Vertex AI returns the native Anthropic response format.
+func (a *anthropicToGCPAnthropicTranslator) ResponseBody(headers map[string]string, body io.Reader, endOfStream bool) (
 	headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error,
 ) {
-	// Read the response body for both streaming and non-streaming.
-	bodyBytes, err := io.ReadAll(body)
-	if err != nil {
-		return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err)
-	}
-
-	// For streaming chunks, parse SSE format to extract token usage.
-	if !endOfStream {
-		// Parse SSE format - split by lines and look for data: lines.
-		for line := range bytes.Lines(bodyBytes) {
-			line = bytes.TrimSpace(line)
-			if bytes.HasPrefix(line, dataPrefix) {
-				jsonData := bytes.TrimPrefix(line, dataPrefix)
-
-				var eventData map[string]any
-				if unmarshalErr := json.Unmarshal(jsonData, &eventData); unmarshalErr != nil {
-					// Skip lines with invalid JSON (like ping events or malformed data).
-					continue
-				}
-				if eventType, ok := eventData["type"].(string); ok {
-					switch eventType {
-					case "message_start":
-						// Extract input tokens from message.usage.
-						if messageData, ok := eventData["message"].(map[string]any); ok {
-							if usageData, ok := messageData["usage"].(map[string]any); ok {
-								if inputTokens, ok := usageData["input_tokens"].(float64); ok {
-									tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec
-								}
-								// Some message_start events may include initial output tokens.
-								if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 {
-									tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec
-								}
-								tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens
-							}
-						}
-
-					case "message_delta":
-						if usageData, ok := eventData["usage"].(map[string]any); ok {
-							if outputTokens, ok := usageData["output_tokens"].(float64); ok {
-								// Add to existing output tokens (in case message_start had some initial ones).
-								tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec
-								tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens
-							}
-						}
-					}
-				}
-			}
-		}
-
-		return nil, &extprocv3.BodyMutation{
-			Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
-		}, tokenUsage, a.requestModel, nil
-	}
-
-	// Parse the Anthropic response to extract token usage.
-	var anthropicResp anthropic.Message
-	if err = json.Unmarshal(bodyBytes, &anthropicResp); err != nil {
-		// If we can't parse as Anthropic format, pass through as-is.
-		return nil, &extprocv3.BodyMutation{
-			Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
-		}, LLMTokenUsage{}, a.requestModel, nil
-	}
-
-	// Extract token usage from the response.
-	tokenUsage = LLMTokenUsage{
-		InputTokens:       uint32(anthropicResp.Usage.InputTokens),                                    //nolint:gosec
-		OutputTokens:      uint32(anthropicResp.Usage.OutputTokens),                                   //nolint:gosec
-		TotalTokens:       uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec
-		CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens),                           //nolint:gosec
-	}
-
-	// Pass through the response body unchanged since both input and output are Anthropic format.
-	headerMutation = &extprocv3.HeaderMutation{}
-	setContentLength(headerMutation, bodyBytes)
-	bodyMutation = &extprocv3.BodyMutation{
-		Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
-	}
-
-	return headerMutation, bodyMutation, tokenUsage, a.requestModel, nil
+	return a.responseHandler.ResponseBody(headers, body, endOfStream, a.requestModel)
 }
diff --git a/internal/extproc/translator/anthropic_helper.go b/internal/extproc/translator/anthropic_helper.go
new file mode 100644
index 0000000000..44cdc08d09
--- /dev/null
+++ b/internal/extproc/translator/anthropic_helper.go
@@ -0,0 +1,143 @@
+// Copyright Envoy AI Gateway Authors
+// SPDX-License-Identifier: Apache-2.0
+// The full text of the Apache license is available in the LICENSE file at
+// the root of the repo.
+
+package translator
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+
+	"github.com/anthropics/anthropic-sdk-go"
+	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
+
+	"github.com/envoyproxy/ai-gateway/internal/internalapi"
+)
+
+// anthropicResponseHandler provides shared response handling logic for Anthropic-compatible APIs.
+// This handler is stateless and used by AWS Bedrock and GCP Vertex AI translators to avoid code duplication.
+type anthropicResponseHandler struct{}
+
+// newAnthropicResponseHandler creates a new stateless response handler.
+func newAnthropicResponseHandler() *anthropicResponseHandler {
+	return &anthropicResponseHandler{}
+}
+
+// ResponseBody handles both streaming and non-streaming Anthropic API responses.
+// It extracts token usage information and returns the response unchanged (passthrough).
+// The requestModel parameter is used to populate the responseModel return value.
+func (h *anthropicResponseHandler) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool, requestModel internalapi.RequestModel) (
+	headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error,
+) {
+	// Read the response body for both streaming and non-streaming.
+	bodyBytes, err := io.ReadAll(body)
+	if err != nil {
+		return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err)
+	}
+
+	// For streaming chunks, parse SSE format to extract token usage.
+	if !endOfStream {
+		tokenUsage = h.extractTokenUsageFromSSE(bodyBytes)
+		return nil, &extprocv3.BodyMutation{
+			Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
+		}, tokenUsage, requestModel, nil
+	}
+
+	// For non-streaming responses, parse the complete Anthropic response.
+	tokenUsage, err = h.extractTokenUsageFromResponse(bodyBytes)
+	if err != nil {
+		// If we can't parse as Anthropic format, pass through as-is.
+		return nil, &extprocv3.BodyMutation{
+			Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
+		}, LLMTokenUsage{}, requestModel, nil
+	}
+
+	// Pass through the response body unchanged since both input and output are Anthropic format.
+	headerMutation = &extprocv3.HeaderMutation{}
+	setContentLength(headerMutation, bodyBytes)
+	bodyMutation = &extprocv3.BodyMutation{
+		Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
+	}
+
+	return headerMutation, bodyMutation, tokenUsage, requestModel, nil
+}
+
+// extractTokenUsageFromSSE parses SSE (Server-Sent Events) format streaming responses
+// to extract token usage information from message_start and message_delta events.
+func (h *anthropicResponseHandler) extractTokenUsageFromSSE(bodyBytes []byte) LLMTokenUsage {
+	var tokenUsage LLMTokenUsage
+
+	// Parse SSE format - split by lines and look for data: lines.
+	for line := range bytes.Lines(bodyBytes) {
+		line = bytes.TrimSpace(line)
+		if !bytes.HasPrefix(line, dataPrefix) {
+			continue
+		}
+		jsonData := bytes.TrimPrefix(line, dataPrefix)
+
+		var eventData map[string]any
+		if unmarshalErr := json.Unmarshal(jsonData, &eventData); unmarshalErr != nil {
+			// Skip lines with invalid JSON (like ping events or malformed data).
+			continue
+		}
+
+		if eventType, ok := eventData["type"].(string); ok {
+			switch eventType {
+			case "message_start":
+				// Extract input tokens from message.usage.
+				if messageData, ok := eventData["message"].(map[string]any); ok {
+					if usageData, ok := messageData["usage"].(map[string]any); ok {
+						if inputTokens, ok := usageData["input_tokens"].(float64); ok {
+							tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec
+						}
+						// Some message_start events may include initial output tokens.
+						if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 {
+							tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec
+						}
+						tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens
+					}
+				}
+
+			case "message_delta":
+				if usageData, ok := eventData["usage"].(map[string]any); ok {
+					if outputTokens, ok := usageData["output_tokens"].(float64); ok {
+						// Add to existing output tokens (in case message_start had some initial ones).
+						tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec
+						tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens
+					}
+				}
+			}
+		}
+	}
+
+	return tokenUsage
+}
+
+// extractTokenUsageFromResponse parses a complete (non-streaming) Anthropic response
+// to extract token usage information.
+func (h *anthropicResponseHandler) extractTokenUsageFromResponse(bodyBytes []byte) (LLMTokenUsage, error) {
+	var anthropicResp anthropic.Message
+	if err := json.Unmarshal(bodyBytes, &anthropicResp); err != nil {
+		return LLMTokenUsage{}, err
+	}
+
+	tokenUsage := LLMTokenUsage{
+		InputTokens:       uint32(anthropicResp.Usage.InputTokens),                                    //nolint:gosec
+		OutputTokens:      uint32(anthropicResp.Usage.OutputTokens),                                   //nolint:gosec
+		TotalTokens:       uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec
+		CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens),                           //nolint:gosec
+	}
+
+	return tokenUsage, nil
+}
+
+// applyModelNameOverride applies model name override logic used by AWS and GCP translators.
+func applyModelNameOverride(originalModel internalapi.RequestModel, override internalapi.ModelNameOverride) internalapi.RequestModel {
+	if override != "" {
+		return override
+	}
+	return originalModel
+}

From bf5ded23e9d2db78c56be5d0a8a9871e3281755f Mon Sep 17 00:00:00 2001
From: secustor <sebastian@poxhofer.at>
Date: Fri, 24 Oct 2025 10:11:38 +0200
Subject: [PATCH 05/15] refactor: extract Anthropic request header generation
 for AWS and GCP

Signed-off-by: secustor <sebastian@poxhofer.at>
---
 .../translator/anthropic_awsanthropic.go      | 17 ++++---------
 .../translator/anthropic_gcpanthropic.go      | 17 ++++---------
 .../translator/anthropic_gcpanthropic_test.go |  3 ++-
 .../extproc/translator/anthropic_helper.go    | 24 +++++++++++++++++++
 .../extproc/translator/openai_gcpanthropic.go |  1 -
 5 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go
index 6968fd731c..8e561352bb 100644
--- a/internal/extproc/translator/anthropic_awsanthropic.go
+++ b/internal/extproc/translator/anthropic_awsanthropic.go
@@ -9,7 +9,6 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
-	"maps"
 	"net/url"
 
 	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
@@ -44,22 +43,14 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(_ []byte, body *anthropi
 	// Extract model name for AWS Bedrock endpoint from the parsed request.
 	modelName := body.GetModel()
 
-	// Work directly with the map since MessagesRequest is already map[string]interface{}.
-	anthropicReq := make(map[string]any)
-	maps.Copy(anthropicReq, *body)
-
 	// Apply model name override if configured.
 	a.requestModel = applyModelNameOverride(modelName, a.modelNameOverride)
 
-	// Remove the model field since AWS Bedrock doesn't want it in the body (it's in the path).
-	delete(anthropicReq, "model")
-
-	// Add AWS-Bedrock-specific anthropic_version field (required by AWS Bedrock).
-	// Uses backend config version (e.g., "bedrock-2023-05-31" for AWS Bedrock).
-	if a.apiVersion == "" {
-		return nil, nil, fmt.Errorf("anthropic_version is required for AWS Bedrock but not provided in backend configuration")
+	// Prepare the request body (removes model field, adds anthropic_version).
+	anthropicReq, err := prepareAnthropicRequest(body, a.apiVersion)
+	if err != nil {
+		return nil, nil, fmt.Errorf("failed to prepare request for AWS Bedrock: %w", err)
 	}
-	anthropicReq[anthropicVersionKey] = a.apiVersion
 
 	// Marshal the modified request.
 	mutatedBody, err := json.Marshal(anthropicReq)
diff --git a/internal/extproc/translator/anthropic_gcpanthropic.go b/internal/extproc/translator/anthropic_gcpanthropic.go
index f5a8bbc799..9bb8c07672 100644
--- a/internal/extproc/translator/anthropic_gcpanthropic.go
+++ b/internal/extproc/translator/anthropic_gcpanthropic.go
@@ -9,7 +9,6 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
-	"maps"
 
 	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 
@@ -42,22 +41,14 @@ func (a *anthropicToGCPAnthropicTranslator) RequestBody(_ []byte, body *anthropi
 	// Extract model name for GCP endpoint from the parsed request.
 	modelName := body.GetModel()
 
-	// Work directly with the map since MessagesRequest is already map[string]interface{}.
-	anthropicReq := make(map[string]any)
-	maps.Copy(anthropicReq, *body)
-
 	// Apply model name override if configured.
 	a.requestModel = applyModelNameOverride(modelName, a.modelNameOverride)
 
-	// Remove the model field since GCP doesn't want it in the body.
-	delete(anthropicReq, "model")
-
-	// Add GCP-specific anthropic_version field (required by GCP Vertex AI).
-	// Uses backend config version (e.g., "vertex-2023-10-16" for GCP Vertex AI).
-	if a.apiVersion == "" {
-		return nil, nil, fmt.Errorf("anthropic_version is required for GCP Vertex AI but not provided in backend configuration")
+	// Prepare the request body (removes model field, adds anthropic_version).
+	anthropicReq, err := prepareAnthropicRequest(body, a.apiVersion)
+	if err != nil {
+		return nil, nil, fmt.Errorf("failed to prepare request for GCP Vertex AI: %w", err)
 	}
-	anthropicReq[anthropicVersionKey] = a.apiVersion
 
 	// Marshal the modified request.
 	mutatedBody, err := json.Marshal(anthropicReq)
diff --git a/internal/extproc/translator/anthropic_gcpanthropic_test.go b/internal/extproc/translator/anthropic_gcpanthropic_test.go
index 2c882399ba..2706ff57ff 100644
--- a/internal/extproc/translator/anthropic_gcpanthropic_test.go
+++ b/internal/extproc/translator/anthropic_gcpanthropic_test.go
@@ -234,7 +234,8 @@ func TestAnthropicToGCPAnthropicTranslator_BackendVersionHandling(t *testing.T)
 
 			if tt.shouldError {
 				require.Error(t, err)
-				require.Contains(t, err.Error(), "anthropic_version is required for GCP Vertex AI")
+				require.Contains(t, err.Error(), "anthropic_version is required")
+				require.Contains(t, err.Error(), "GCP Vertex AI")
 				return
 			}
 
diff --git a/internal/extproc/translator/anthropic_helper.go b/internal/extproc/translator/anthropic_helper.go
index 44cdc08d09..667fe911f7 100644
--- a/internal/extproc/translator/anthropic_helper.go
+++ b/internal/extproc/translator/anthropic_helper.go
@@ -10,13 +10,19 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"maps"
 
 	"github.com/anthropics/anthropic-sdk-go"
 	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 
+	anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic"
 	"github.com/envoyproxy/ai-gateway/internal/internalapi"
 )
 
+const (
+	anthropicVersionKey = "anthropic_version"
+)
+
 // anthropicResponseHandler provides shared response handling logic for Anthropic-compatible APIs.
 // This handler is stateless and used by AWS Bedrock and GCP Vertex AI translators to avoid code duplication.
 type anthropicResponseHandler struct{}
@@ -141,3 +147,21 @@ func applyModelNameOverride(originalModel internalapi.RequestModel, override int
 	}
 	return originalModel
 }
+
+// prepareAnthropicRequest prepares the request body for cloud providers (AWS/GCP)
+// The anthropic_version field is required by cloud provider implementations.
+func prepareAnthropicRequest(body *anthropicschema.MessagesRequest, apiVersion string) (map[string]any, error) {
+	anthropicReq := make(map[string]any)
+	maps.Copy(anthropicReq, *body)
+
+	// Remove model field - cloud providers use it in the URL path instead
+	delete(anthropicReq, "model")
+
+	// Add required anthropic_version field
+	if apiVersion == "" {
+		return nil, fmt.Errorf("anthropic_version is required but not provided in backend configuration")
+	}
+	anthropicReq[anthropicVersionKey] = apiVersion
+
+	return anthropicReq, nil
+}
diff --git a/internal/extproc/translator/openai_gcpanthropic.go b/internal/extproc/translator/openai_gcpanthropic.go
index f089595f5d..31d2fb8766 100644
--- a/internal/extproc/translator/openai_gcpanthropic.go
+++ b/internal/extproc/translator/openai_gcpanthropic.go
@@ -29,7 +29,6 @@ import (
 
 // currently a requirement for GCP Vertex / Anthropic API https://docs.anthropic.com/en/api/claude-on-vertex-ai
 const (
-	anthropicVersionKey   = "anthropic_version"
 	gcpBackendError       = "GCPBackendError"
 	tempNotSupportedError = "temperature %.2f is not supported by Anthropic (must be between 0.0 and 1.0)"
 )

From 729a190993319c704e7bffd03a7ab99c86791acf Mon Sep 17 00:00:00 2001
From: secustor <sebastian@poxhofer.at>
Date: Fri, 24 Oct 2025 18:17:23 +0200
Subject: [PATCH 06/15] Revert "refactor: extract Anthropic request header
 generation for AWS and GCP"

This reverts commit bf5ded23e9d2db78c56be5d0a8a9871e3281755f.

Signed-off-by: secustor <sebastian@poxhofer.at>
---
 .../translator/anthropic_awsanthropic.go      | 17 +++++++++----
 .../translator/anthropic_gcpanthropic.go      | 17 +++++++++----
 .../translator/anthropic_gcpanthropic_test.go |  3 +--
 .../extproc/translator/anthropic_helper.go    | 24 -------------------
 .../extproc/translator/openai_gcpanthropic.go |  1 +
 5 files changed, 28 insertions(+), 34 deletions(-)

diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go
index 8e561352bb..6968fd731c 100644
--- a/internal/extproc/translator/anthropic_awsanthropic.go
+++ b/internal/extproc/translator/anthropic_awsanthropic.go
@@ -9,6 +9,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"maps"
 	"net/url"
 
 	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
@@ -43,14 +44,22 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(_ []byte, body *anthropi
 	// Extract model name for AWS Bedrock endpoint from the parsed request.
 	modelName := body.GetModel()
 
+	// Work directly with the map since MessagesRequest is already map[string]interface{}.
+	anthropicReq := make(map[string]any)
+	maps.Copy(anthropicReq, *body)
+
 	// Apply model name override if configured.
 	a.requestModel = applyModelNameOverride(modelName, a.modelNameOverride)
 
-	// Prepare the request body (removes model field, adds anthropic_version).
-	anthropicReq, err := prepareAnthropicRequest(body, a.apiVersion)
-	if err != nil {
-		return nil, nil, fmt.Errorf("failed to prepare request for AWS Bedrock: %w", err)
+	// Remove the model field since AWS Bedrock doesn't want it in the body (it's in the path).
+	delete(anthropicReq, "model")
+
+	// Add AWS-Bedrock-specific anthropic_version field (required by AWS Bedrock).
+	// Uses backend config version (e.g., "bedrock-2023-05-31" for AWS Bedrock).
+	if a.apiVersion == "" {
+		return nil, nil, fmt.Errorf("anthropic_version is required for AWS Bedrock but not provided in backend configuration")
 	}
+	anthropicReq[anthropicVersionKey] = a.apiVersion
 
 	// Marshal the modified request.
 	mutatedBody, err := json.Marshal(anthropicReq)
diff --git a/internal/extproc/translator/anthropic_gcpanthropic.go b/internal/extproc/translator/anthropic_gcpanthropic.go
index 9bb8c07672..f5a8bbc799 100644
--- a/internal/extproc/translator/anthropic_gcpanthropic.go
+++ b/internal/extproc/translator/anthropic_gcpanthropic.go
@@ -9,6 +9,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"maps"
 
 	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 
@@ -41,14 +42,22 @@ func (a *anthropicToGCPAnthropicTranslator) RequestBody(_ []byte, body *anthropi
 	// Extract model name for GCP endpoint from the parsed request.
 	modelName := body.GetModel()
 
+	// Work directly with the map since MessagesRequest is already map[string]interface{}.
+	anthropicReq := make(map[string]any)
+	maps.Copy(anthropicReq, *body)
+
 	// Apply model name override if configured.
 	a.requestModel = applyModelNameOverride(modelName, a.modelNameOverride)
 
-	// Prepare the request body (removes model field, adds anthropic_version).
-	anthropicReq, err := prepareAnthropicRequest(body, a.apiVersion)
-	if err != nil {
-		return nil, nil, fmt.Errorf("failed to prepare request for GCP Vertex AI: %w", err)
+	// Remove the model field since GCP doesn't want it in the body.
+	delete(anthropicReq, "model")
+
+	// Add GCP-specific anthropic_version field (required by GCP Vertex AI).
+	// Uses backend config version (e.g., "vertex-2023-10-16" for GCP Vertex AI).
+	if a.apiVersion == "" {
+		return nil, nil, fmt.Errorf("anthropic_version is required for GCP Vertex AI but not provided in backend configuration")
 	}
+	anthropicReq[anthropicVersionKey] = a.apiVersion
 
 	// Marshal the modified request.
 	mutatedBody, err := json.Marshal(anthropicReq)
diff --git a/internal/extproc/translator/anthropic_gcpanthropic_test.go b/internal/extproc/translator/anthropic_gcpanthropic_test.go
index 2706ff57ff..2c882399ba 100644
--- a/internal/extproc/translator/anthropic_gcpanthropic_test.go
+++ b/internal/extproc/translator/anthropic_gcpanthropic_test.go
@@ -234,8 +234,7 @@ func TestAnthropicToGCPAnthropicTranslator_BackendVersionHandling(t *testing.T)
 
 			if tt.shouldError {
 				require.Error(t, err)
-				require.Contains(t, err.Error(), "anthropic_version is required")
-				require.Contains(t, err.Error(), "GCP Vertex AI")
+				require.Contains(t, err.Error(), "anthropic_version is required for GCP Vertex AI")
 				return
 			}
 
diff --git a/internal/extproc/translator/anthropic_helper.go b/internal/extproc/translator/anthropic_helper.go
index 667fe911f7..44cdc08d09 100644
--- a/internal/extproc/translator/anthropic_helper.go
+++ b/internal/extproc/translator/anthropic_helper.go
@@ -10,19 +10,13 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
-	"maps"
 
 	"github.com/anthropics/anthropic-sdk-go"
 	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 
-	anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic"
 	"github.com/envoyproxy/ai-gateway/internal/internalapi"
 )
 
-const (
-	anthropicVersionKey = "anthropic_version"
-)
-
 // anthropicResponseHandler provides shared response handling logic for Anthropic-compatible APIs.
 // This handler is stateless and used by AWS Bedrock and GCP Vertex AI translators to avoid code duplication.
 type anthropicResponseHandler struct{}
@@ -147,21 +141,3 @@ func applyModelNameOverride(originalModel internalapi.RequestModel, override int
 	}
 	return originalModel
 }
-
-// prepareAnthropicRequest prepares the request body for cloud providers (AWS/GCP)
-// The anthropic_version field is required by cloud provider implementations.
-func prepareAnthropicRequest(body *anthropicschema.MessagesRequest, apiVersion string) (map[string]any, error) {
-	anthropicReq := make(map[string]any)
-	maps.Copy(anthropicReq, *body)
-
-	// Remove model field - cloud providers use it in the URL path instead
-	delete(anthropicReq, "model")
-
-	// Add required anthropic_version field
-	if apiVersion == "" {
-		return nil, fmt.Errorf("anthropic_version is required but not provided in backend configuration")
-	}
-	anthropicReq[anthropicVersionKey] = apiVersion
-
-	return anthropicReq, nil
-}
diff --git a/internal/extproc/translator/openai_gcpanthropic.go b/internal/extproc/translator/openai_gcpanthropic.go
index 31d2fb8766..f089595f5d 100644
--- a/internal/extproc/translator/openai_gcpanthropic.go
+++ b/internal/extproc/translator/openai_gcpanthropic.go
@@ -29,6 +29,7 @@ import (
 
 // currently a requirement for GCP Vertex / Anthropic API https://docs.anthropic.com/en/api/claude-on-vertex-ai
 const (
+	anthropicVersionKey   = "anthropic_version"
 	gcpBackendError       = "GCPBackendError"
 	tempNotSupportedError = "temperature %.2f is not supported by Anthropic (must be between 0.0 and 1.0)"
 )

From 46505bc15d48d908acbcb8b23af7760a2ba01c89 Mon Sep 17 00:00:00 2001
From: secustor <sebastian@poxhofer.at>
Date: Fri, 24 Oct 2025 18:17:24 +0200
Subject: [PATCH 07/15] Revert "refactor: extract ResponseHandler and
 applyModelNameOverride"

This reverts commit 460c72e67b330a4f408113755b6641f209089f8f.

Signed-off-by: secustor <sebastian@poxhofer.at>
---
 .../translator/anthropic_awsanthropic.go      |  94 +++++++++++-
 .../translator/anthropic_gcpanthropic.go      |  94 +++++++++++-
 .../extproc/translator/anthropic_helper.go    | 143 ------------------
 3 files changed, 176 insertions(+), 155 deletions(-)
 delete mode 100644 internal/extproc/translator/anthropic_helper.go

diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go
index 6968fd731c..735c9a8119 100644
--- a/internal/extproc/translator/anthropic_awsanthropic.go
+++ b/internal/extproc/translator/anthropic_awsanthropic.go
@@ -6,12 +6,14 @@
 package translator
 
 import (
+	"bytes"
 	"encoding/json"
 	"fmt"
 	"io"
 	"maps"
 	"net/url"
 
+	"github.com/anthropics/anthropic-sdk-go"
 	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 
 	anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic"
@@ -25,15 +27,14 @@ func NewAnthropicToAWSAnthropicTranslator(apiVersion string, modelNameOverride i
 	return &anthropicToAWSAnthropicTranslator{
 		apiVersion:        apiVersion,
 		modelNameOverride: modelNameOverride,
-		responseHandler:   newAnthropicResponseHandler(),
 	}
 }
 
 type anthropicToAWSAnthropicTranslator struct {
+	// TODO: reuse anthropicToAnthropicTranslator and embed it here to avoid code duplication.
 	apiVersion        string
 	modelNameOverride internalapi.ModelNameOverride
 	requestModel      internalapi.RequestModel
-	responseHandler   *anthropicResponseHandler
 }
 
 // RequestBody implements [AnthropicMessagesTranslator.RequestBody] for Anthropic to AWS Bedrock Anthropic translation.
@@ -49,7 +50,10 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(_ []byte, body *anthropi
 	maps.Copy(anthropicReq, *body)
 
 	// Apply model name override if configured.
-	a.requestModel = applyModelNameOverride(modelName, a.modelNameOverride)
+	a.requestModel = modelName
+	if a.modelNameOverride != "" {
+		a.requestModel = a.modelNameOverride
+	}
 
 	// Remove the model field since AWS Bedrock doesn't want it in the body (it's in the path).
 	delete(anthropicReq, "model")
@@ -94,9 +98,87 @@ func (a *anthropicToAWSAnthropicTranslator) ResponseHeaders(_ map[string]string)
 }
 
 // ResponseBody implements [AnthropicMessagesTranslator.ResponseBody] for Anthropic to AWS Bedrock Anthropic.
-// This delegates to the shared anthropicResponseHandler since AWS Bedrock returns the native Anthropic response format.
-func (a *anthropicToAWSAnthropicTranslator) ResponseBody(headers map[string]string, body io.Reader, endOfStream bool) (
+// This is essentially a passthrough since AWS Bedrock returns the native Anthropic response format.
+func (a *anthropicToAWSAnthropicTranslator) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool) (
 	headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error,
 ) {
-	return a.responseHandler.ResponseBody(headers, body, endOfStream, a.requestModel)
+	// Read the response body for both streaming and non-streaming.
+	bodyBytes, err := io.ReadAll(body)
+	if err != nil {
+		return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err)
+	}
+
+	// For streaming chunks, parse SSE format to extract token usage.
+	if !endOfStream {
+		// Parse SSE format - split by lines and look for data: lines.
+		for line := range bytes.Lines(bodyBytes) {
+			line = bytes.TrimSpace(line)
+			if bytes.HasPrefix(line, dataPrefix) {
+				jsonData := bytes.TrimPrefix(line, dataPrefix)
+
+				var eventData map[string]any
+				if unmarshalErr := json.Unmarshal(jsonData, &eventData); unmarshalErr != nil {
+					// Skip lines with invalid JSON (like ping events or malformed data).
+					continue
+				}
+				if eventType, ok := eventData["type"].(string); ok {
+					switch eventType {
+					case "message_start":
+						// Extract input tokens from message.usage.
+						if messageData, ok := eventData["message"].(map[string]any); ok {
+							if usageData, ok := messageData["usage"].(map[string]any); ok {
+								if inputTokens, ok := usageData["input_tokens"].(float64); ok {
+									tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec
+								}
+								// Some message_start events may include initial output tokens.
+								if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 {
+									tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec
+								}
+								tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens
+							}
+						}
+
+					case "message_delta":
+						if usageData, ok := eventData["usage"].(map[string]any); ok {
+							if outputTokens, ok := usageData["output_tokens"].(float64); ok {
+								// Add to existing output tokens (in case message_start had some initial ones).
+								tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec
+								tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens
+							}
+						}
+					}
+				}
+			}
+		}
+
+		return nil, &extprocv3.BodyMutation{
+			Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
+		}, tokenUsage, a.requestModel, nil
+	}
+
+	// Parse the Anthropic response to extract token usage.
+	var anthropicResp anthropic.Message
+	if err = json.Unmarshal(bodyBytes, &anthropicResp); err != nil {
+		// If we can't parse as Anthropic format, pass through as-is.
+		return nil, &extprocv3.BodyMutation{
+			Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
+		}, LLMTokenUsage{}, a.requestModel, nil
+	}
+
+	// Extract token usage from the response.
+	tokenUsage = LLMTokenUsage{
+		InputTokens:       uint32(anthropicResp.Usage.InputTokens),                                    //nolint:gosec
+		OutputTokens:      uint32(anthropicResp.Usage.OutputTokens),                                   //nolint:gosec
+		TotalTokens:       uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec
+		CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens),                           //nolint:gosec
+	}
+
+	// Pass through the response body unchanged since both input and output are Anthropic format.
+	headerMutation = &extprocv3.HeaderMutation{}
+	setContentLength(headerMutation, bodyBytes)
+	bodyMutation = &extprocv3.BodyMutation{
+		Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
+	}
+
+	return headerMutation, bodyMutation, tokenUsage, a.requestModel, nil
 }
diff --git a/internal/extproc/translator/anthropic_gcpanthropic.go b/internal/extproc/translator/anthropic_gcpanthropic.go
index f5a8bbc799..37a5d4a5a3 100644
--- a/internal/extproc/translator/anthropic_gcpanthropic.go
+++ b/internal/extproc/translator/anthropic_gcpanthropic.go
@@ -6,11 +6,13 @@
 package translator
 
 import (
+	"bytes"
 	"encoding/json"
 	"fmt"
 	"io"
 	"maps"
 
+	"github.com/anthropics/anthropic-sdk-go"
 	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 
 	anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic"
@@ -23,15 +25,14 @@ func NewAnthropicToGCPAnthropicTranslator(apiVersion string, modelNameOverride i
 	return &anthropicToGCPAnthropicTranslator{
 		apiVersion:        apiVersion,
 		modelNameOverride: modelNameOverride,
-		responseHandler:   newAnthropicResponseHandler(),
 	}
 }
 
 type anthropicToGCPAnthropicTranslator struct {
+	// TODO: reuse anthropicToAnthropicTranslator and embed it here to avoid code duplication.
 	apiVersion        string
 	modelNameOverride internalapi.ModelNameOverride
 	requestModel      internalapi.RequestModel
-	responseHandler   *anthropicResponseHandler
 }
 
 // RequestBody implements [AnthropicMessagesTranslator.RequestBody] for Anthropic to GCP Anthropic translation.
@@ -47,7 +48,10 @@ func (a *anthropicToGCPAnthropicTranslator) RequestBody(_ []byte, body *anthropi
 	maps.Copy(anthropicReq, *body)
 
 	// Apply model name override if configured.
-	a.requestModel = applyModelNameOverride(modelName, a.modelNameOverride)
+	a.requestModel = modelName
+	if a.modelNameOverride != "" {
+		a.requestModel = a.modelNameOverride
+	}
 
 	// Remove the model field since GCP doesn't want it in the body.
 	delete(anthropicReq, "model")
@@ -86,9 +90,87 @@ func (a *anthropicToGCPAnthropicTranslator) ResponseHeaders(_ map[string]string)
 }
 
 // ResponseBody implements [AnthropicMessagesTranslator.ResponseBody] for Anthropic to GCP Anthropic.
-// This delegates to the shared anthropicResponseHandler since GCP Vertex AI returns the native Anthropic response format.
-func (a *anthropicToGCPAnthropicTranslator) ResponseBody(headers map[string]string, body io.Reader, endOfStream bool) (
+// This is essentially a passthrough since both use the same Anthropic response format.
+func (a *anthropicToGCPAnthropicTranslator) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool) (
 	headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error,
 ) {
-	return a.responseHandler.ResponseBody(headers, body, endOfStream, a.requestModel)
+	// Read the response body for both streaming and non-streaming.
+	bodyBytes, err := io.ReadAll(body)
+	if err != nil {
+		return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err)
+	}
+
+	// For streaming chunks, parse SSE format to extract token usage.
+	if !endOfStream {
+		// Parse SSE format - split by lines and look for data: lines.
+		for line := range bytes.Lines(bodyBytes) {
+			line = bytes.TrimSpace(line)
+			if bytes.HasPrefix(line, dataPrefix) {
+				jsonData := bytes.TrimPrefix(line, dataPrefix)
+
+				var eventData map[string]any
+				if unmarshalErr := json.Unmarshal(jsonData, &eventData); unmarshalErr != nil {
+					// Skip lines with invalid JSON (like ping events or malformed data).
+					continue
+				}
+				if eventType, ok := eventData["type"].(string); ok {
+					switch eventType {
+					case "message_start":
+						// Extract input tokens from message.usage.
+						if messageData, ok := eventData["message"].(map[string]any); ok {
+							if usageData, ok := messageData["usage"].(map[string]any); ok {
+								if inputTokens, ok := usageData["input_tokens"].(float64); ok {
+									tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec
+								}
+								// Some message_start events may include initial output tokens.
+								if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 {
+									tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec
+								}
+								tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens
+							}
+						}
+
+					case "message_delta":
+						if usageData, ok := eventData["usage"].(map[string]any); ok {
+							if outputTokens, ok := usageData["output_tokens"].(float64); ok {
+								// Add to existing output tokens (in case message_start had some initial ones).
+								tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec
+								tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens
+							}
+						}
+					}
+				}
+			}
+		}
+
+		return nil, &extprocv3.BodyMutation{
+			Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
+		}, tokenUsage, a.requestModel, nil
+	}
+
+	// Parse the Anthropic response to extract token usage.
+	var anthropicResp anthropic.Message
+	if err = json.Unmarshal(bodyBytes, &anthropicResp); err != nil {
+		// If we can't parse as Anthropic format, pass through as-is.
+		return nil, &extprocv3.BodyMutation{
+			Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
+		}, LLMTokenUsage{}, a.requestModel, nil
+	}
+
+	// Extract token usage from the response.
+	tokenUsage = LLMTokenUsage{
+		InputTokens:       uint32(anthropicResp.Usage.InputTokens),                                    //nolint:gosec
+		OutputTokens:      uint32(anthropicResp.Usage.OutputTokens),                                   //nolint:gosec
+		TotalTokens:       uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec
+		CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens),                           //nolint:gosec
+	}
+
+	// Pass through the response body unchanged since both input and output are Anthropic format.
+	headerMutation = &extprocv3.HeaderMutation{}
+	setContentLength(headerMutation, bodyBytes)
+	bodyMutation = &extprocv3.BodyMutation{
+		Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
+	}
+
+	return headerMutation, bodyMutation, tokenUsage, a.requestModel, nil
 }
diff --git a/internal/extproc/translator/anthropic_helper.go b/internal/extproc/translator/anthropic_helper.go
deleted file mode 100644
index 44cdc08d09..0000000000
--- a/internal/extproc/translator/anthropic_helper.go
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright Envoy AI Gateway Authors
-// SPDX-License-Identifier: Apache-2.0
-// The full text of the Apache license is available in the LICENSE file at
-// the root of the repo.
-
-package translator
-
-import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"io"
-
-	"github.com/anthropics/anthropic-sdk-go"
-	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
-
-	"github.com/envoyproxy/ai-gateway/internal/internalapi"
-)
-
-// anthropicResponseHandler provides shared response handling logic for Anthropic-compatible APIs.
-// This handler is stateless and used by AWS Bedrock and GCP Vertex AI translators to avoid code duplication.
-type anthropicResponseHandler struct{}
-
-// newAnthropicResponseHandler creates a new stateless response handler.
-func newAnthropicResponseHandler() *anthropicResponseHandler {
-	return &anthropicResponseHandler{}
-}
-
-// ResponseBody handles both streaming and non-streaming Anthropic API responses.
-// It extracts token usage information and returns the response unchanged (passthrough).
-// The requestModel parameter is used to populate the responseModel return value.
-func (h *anthropicResponseHandler) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool, requestModel internalapi.RequestModel) (
-	headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error,
-) {
-	// Read the response body for both streaming and non-streaming.
-	bodyBytes, err := io.ReadAll(body)
-	if err != nil {
-		return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err)
-	}
-
-	// For streaming chunks, parse SSE format to extract token usage.
-	if !endOfStream {
-		tokenUsage = h.extractTokenUsageFromSSE(bodyBytes)
-		return nil, &extprocv3.BodyMutation{
-			Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
-		}, tokenUsage, requestModel, nil
-	}
-
-	// For non-streaming responses, parse the complete Anthropic response.
-	tokenUsage, err = h.extractTokenUsageFromResponse(bodyBytes)
-	if err != nil {
-		// If we can't parse as Anthropic format, pass through as-is.
-		return nil, &extprocv3.BodyMutation{
-			Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
-		}, LLMTokenUsage{}, requestModel, nil
-	}
-
-	// Pass through the response body unchanged since both input and output are Anthropic format.
-	headerMutation = &extprocv3.HeaderMutation{}
-	setContentLength(headerMutation, bodyBytes)
-	bodyMutation = &extprocv3.BodyMutation{
-		Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
-	}
-
-	return headerMutation, bodyMutation, tokenUsage, requestModel, nil
-}
-
-// extractTokenUsageFromSSE parses SSE (Server-Sent Events) format streaming responses
-// to extract token usage information from message_start and message_delta events.
-func (h *anthropicResponseHandler) extractTokenUsageFromSSE(bodyBytes []byte) LLMTokenUsage {
-	var tokenUsage LLMTokenUsage
-
-	// Parse SSE format - split by lines and look for data: lines.
-	for line := range bytes.Lines(bodyBytes) {
-		line = bytes.TrimSpace(line)
-		if !bytes.HasPrefix(line, dataPrefix) {
-			continue
-		}
-		jsonData := bytes.TrimPrefix(line, dataPrefix)
-
-		var eventData map[string]any
-		if unmarshalErr := json.Unmarshal(jsonData, &eventData); unmarshalErr != nil {
-			// Skip lines with invalid JSON (like ping events or malformed data).
-			continue
-		}
-
-		if eventType, ok := eventData["type"].(string); ok {
-			switch eventType {
-			case "message_start":
-				// Extract input tokens from message.usage.
-				if messageData, ok := eventData["message"].(map[string]any); ok {
-					if usageData, ok := messageData["usage"].(map[string]any); ok {
-						if inputTokens, ok := usageData["input_tokens"].(float64); ok {
-							tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec
-						}
-						// Some message_start events may include initial output tokens.
-						if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 {
-							tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec
-						}
-						tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens
-					}
-				}
-
-			case "message_delta":
-				if usageData, ok := eventData["usage"].(map[string]any); ok {
-					if outputTokens, ok := usageData["output_tokens"].(float64); ok {
-						// Add to existing output tokens (in case message_start had some initial ones).
-						tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec
-						tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens
-					}
-				}
-			}
-		}
-	}
-
-	return tokenUsage
-}
-
-// extractTokenUsageFromResponse parses a complete (non-streaming) Anthropic response
-// to extract token usage information.
-func (h *anthropicResponseHandler) extractTokenUsageFromResponse(bodyBytes []byte) (LLMTokenUsage, error) {
-	var anthropicResp anthropic.Message
-	if err := json.Unmarshal(bodyBytes, &anthropicResp); err != nil {
-		return LLMTokenUsage{}, err
-	}
-
-	tokenUsage := LLMTokenUsage{
-		InputTokens:       uint32(anthropicResp.Usage.InputTokens),                                    //nolint:gosec
-		OutputTokens:      uint32(anthropicResp.Usage.OutputTokens),                                   //nolint:gosec
-		TotalTokens:       uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec
-		CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens),                           //nolint:gosec
-	}
-
-	return tokenUsage, nil
-}
-
-// applyModelNameOverride applies model name override logic used by AWS and GCP translators.
-func applyModelNameOverride(originalModel internalapi.RequestModel, override internalapi.ModelNameOverride) internalapi.RequestModel {
-	if override != "" {
-		return override
-	}
-	return originalModel
-}

From 353087737918ff1540fc55f8a03e1964bb366755 Mon Sep 17 00:00:00 2001
From: secustor <sebastian@poxhofer.at>
Date: Sat, 25 Oct 2025 00:48:17 +0200
Subject: [PATCH 08/15] refactor: use antrophic to anthropic translator

Signed-off-by: secustor <sebastian@poxhofer.at>
---
 internal/extproc/messages_processor_test.go   |  78 +++
 .../translator/anthropic_awsanthropic.go      | 153 ++----
 .../translator/anthropic_awsanthropic_test.go | 449 +++++++++++-------
 3 files changed, 385 insertions(+), 295 deletions(-)

diff --git a/internal/extproc/messages_processor_test.go b/internal/extproc/messages_processor_test.go
index 7d0b696c4e..3c09a376da 100644
--- a/internal/extproc/messages_processor_test.go
+++ b/internal/extproc/messages_processor_test.go
@@ -826,6 +826,84 @@ func TestMessagesProcessorUpstreamFilter_ProcessRequestHeaders_WithHeaderMutatio
 		// Check that original headers remain unchanged.
 		require.Equal(t, "bearer token123", headers["authorization"])
 	})
+
+	t.Run("multiple header mutations with same key - last one wins", func(t *testing.T) {
+		headers := map[string]string{
+			":path":         "/anthropic/v1/messages",
+			"x-ai-eg-model": "anthropic.claude-3-haiku-20240307-v1:0",
+		}
+
+		// Create request body.
+		requestBody := &anthropicschema.MessagesRequest{
+			"model":      "anthropic.claude-3-haiku-20240307-v1:0",
+			"max_tokens": 1000,
+			"messages":   []any{map[string]any{"role": "user", "content": "Hello"}},
+		}
+		requestBodyRaw := []byte(`{"model": "anthropic.claude-3-haiku-20240307-v1:0", "max_tokens": 1000, "messages": [{"role": "user", "content": "Hello"}]}`)
+
+		// Create mock translator that returns multiple header mutations for the same key.
+		// This simulates a scenario where the translator sets :path multiple times.
+		mockTranslator := mockAnthropicTranslator{
+			t:                           t,
+			expRequestBody:              requestBody,
+			expForceRequestBodyMutation: false,
+			retHeaderMutation: &extprocv3.HeaderMutation{
+				SetHeaders: []*corev3.HeaderValueOption{
+					{
+						Header: &corev3.HeaderValue{
+							Key:      ":path",
+							RawValue: []byte("/anthropic/v1/messages"),
+						},
+					},
+					{
+						Header: &corev3.HeaderValue{
+							Key:      ":path",
+							RawValue: []byte("/model/anthropic.claude-3-haiku-20240307-v1:0/invoke"),
+						},
+					},
+				},
+			},
+			retBodyMutation: &extprocv3.BodyMutation{},
+			retErr:          nil,
+		}
+
+		// Create mock metrics.
+		chatMetrics := metrics.NewChatCompletionFactory(noop.NewMeterProvider().Meter("test"), map[string]string{})()
+
+		// Create processor.
+		processor := &messagesProcessorUpstreamFilter{
+			config:                 &processorConfig{},
+			requestHeaders:         headers,
+			logger:                 slog.Default(),
+			metrics:                chatMetrics,
+			translator:             mockTranslator,
+			originalRequestBody:    requestBody,
+			originalRequestBodyRaw: requestBodyRaw,
+			handler:                &mockBackendAuthHandler{},
+		}
+
+		ctx := context.Background()
+		response, err := processor.ProcessRequestHeaders(ctx, nil)
+
+		require.NoError(t, err)
+		require.NotNil(t, response)
+
+		commonRes := response.Response.(*extprocv3.ProcessingResponse_RequestHeaders).RequestHeaders.Response
+
+		// Check that header mutations were applied.
+		require.NotNil(t, commonRes.HeaderMutation)
+		require.Len(t, commonRes.HeaderMutation.SetHeaders, 2)
+
+		// Verify that both header mutations are present, with the last one being the final value.
+		require.Equal(t, ":path", commonRes.HeaderMutation.SetHeaders[0].Header.Key)
+		require.Equal(t, []byte("/anthropic/v1/messages"), commonRes.HeaderMutation.SetHeaders[0].Header.RawValue)
+
+		require.Equal(t, ":path", commonRes.HeaderMutation.SetHeaders[1].Header.Key)
+		require.Equal(t, []byte("/model/anthropic.claude-3-haiku-20240307-v1:0/invoke"), commonRes.HeaderMutation.SetHeaders[1].Header.RawValue)
+
+		// The last mutation should win - verify the header value in the processor's headers.
+		require.Equal(t, "/model/anthropic.claude-3-haiku-20240307-v1:0/invoke", headers[":path"])
+	})
 }
 
 func TestMessagesProcessorUpstreamFilter_SetBackend_WithHeaderMutations(t *testing.T) {
diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go
index 735c9a8119..cc61535364 100644
--- a/internal/extproc/translator/anthropic_awsanthropic.go
+++ b/internal/extproc/translator/anthropic_awsanthropic.go
@@ -6,15 +6,13 @@
 package translator
 
 import (
-	"bytes"
-	"encoding/json"
 	"fmt"
 	"io"
-	"maps"
 	"net/url"
 
-	"github.com/anthropics/anthropic-sdk-go"
+	corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
 	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
+	"github.com/tidwall/sjson"
 
 	anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic"
 	"github.com/envoyproxy/ai-gateway/internal/internalapi"
@@ -24,56 +22,51 @@ import (
 // AWS Bedrock supports the native Anthropic Messages API, so this is essentially a passthrough
 // translator with AWS-specific path modifications.
 func NewAnthropicToAWSAnthropicTranslator(apiVersion string, modelNameOverride internalapi.ModelNameOverride) AnthropicMessagesTranslator {
+	anthropicTranslator := NewAnthropicToAnthropicTranslator(apiVersion, modelNameOverride).(*anthropicToAnthropicTranslator)
 	return &anthropicToAWSAnthropicTranslator{
-		apiVersion:        apiVersion,
-		modelNameOverride: modelNameOverride,
+		apiVersion:                     apiVersion,
+		anthropicToAnthropicTranslator: *anthropicTranslator,
 	}
 }
 
 type anthropicToAWSAnthropicTranslator struct {
-	// TODO: reuse anthropicToAnthropicTranslator and embed it here to avoid code duplication.
-	apiVersion        string
-	modelNameOverride internalapi.ModelNameOverride
-	requestModel      internalapi.RequestModel
+	anthropicToAnthropicTranslator
+	apiVersion string
 }
 
 // RequestBody implements [AnthropicMessagesTranslator.RequestBody] for Anthropic to AWS Bedrock Anthropic translation.
 // This handles the transformation from native Anthropic format to AWS Bedrock format.
-func (a *anthropicToAWSAnthropicTranslator) RequestBody(_ []byte, body *anthropicschema.MessagesRequest, _ bool) (
+// https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages-request-response.html
+func (a *anthropicToAWSAnthropicTranslator) RequestBody(rawBody []byte, body *anthropicschema.MessagesRequest, _ bool) (
 	headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, err error,
 ) {
-	// Extract model name for AWS Bedrock endpoint from the parsed request.
-	modelName := body.GetModel()
-
-	// Work directly with the map since MessagesRequest is already map[string]interface{}.
-	anthropicReq := make(map[string]any)
-	maps.Copy(anthropicReq, *body)
-
-	// Apply model name override if configured.
-	a.requestModel = modelName
-	if a.modelNameOverride != "" {
-		a.requestModel = a.modelNameOverride
+	// AWS Bedrock always needs a body mutation because we must add anthropic_version and remove model field
+	headerMutation, bodyMutation, err = a.anthropicToAnthropicTranslator.RequestBody(rawBody, body, true)
+	if err != nil {
+		return
 	}
 
-	// Remove the model field since AWS Bedrock doesn't want it in the body (it's in the path).
-	delete(anthropicReq, "model")
-
-	// Add AWS-Bedrock-specific anthropic_version field (required by AWS Bedrock).
-	// Uses backend config version (e.g., "bedrock-2023-05-31" for AWS Bedrock).
-	if a.apiVersion == "" {
-		return nil, nil, fmt.Errorf("anthropic_version is required for AWS Bedrock but not provided in backend configuration")
+	// add anthropic_version field
+	preparedBody, err := sjson.SetBytes(bodyMutation.GetBody(), anthropicVersionKey, a.apiVersion)
+	if err != nil {
+		return nil, nil, fmt.Errorf("failed to set anthropic_version field: %w", err)
 	}
-	anthropicReq[anthropicVersionKey] = a.apiVersion
-
-	// Marshal the modified request.
-	mutatedBody, err := json.Marshal(anthropicReq)
+	// delete model field as AWS Bedrock expects model in the path, not in the body
+	preparedBody, err = sjson.DeleteBytes(preparedBody, "model")
 	if err != nil {
-		return nil, nil, fmt.Errorf("failed to marshal modified request: %w", err)
+		return nil, nil, fmt.Errorf("failed to delete model field: %w", err)
 	}
 
+	bodyMutation = &extprocv3.BodyMutation{
+		Mutation: &extprocv3.BodyMutation_Body{Body: preparedBody},
+	}
+
+	// update content length after changing the body
+	setContentLength(headerMutation, preparedBody)
+
 	// Determine the AWS Bedrock path based on whether streaming is requested.
 	var pathTemplate string
-	if stream, ok := anthropicReq["stream"].(bool); ok && stream {
+	if body.GetStream() {
 		pathTemplate = "/model/%s/invoke-stream"
 	} else {
 		pathTemplate = "/model/%s/invoke"
@@ -85,7 +78,13 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(_ []byte, body *anthropi
 	encodedModelID := url.PathEscape(a.requestModel)
 	pathSuffix := fmt.Sprintf(pathTemplate, encodedModelID)
 
-	headerMutation, bodyMutation = buildRequestMutations(pathSuffix, mutatedBody)
+	// Overwriting path of the Anthropic to Anthropic translator
+	headerMutation.SetHeaders = append(headerMutation.SetHeaders, &corev3.HeaderValueOption{
+		Header: &corev3.HeaderValue{
+			Key:      ":path",
+			RawValue: []byte(pathSuffix),
+		},
+	})
 	return
 }
 
@@ -99,86 +98,8 @@ func (a *anthropicToAWSAnthropicTranslator) ResponseHeaders(_ map[string]string)
 
 // ResponseBody implements [AnthropicMessagesTranslator.ResponseBody] for Anthropic to AWS Bedrock Anthropic.
 // This is essentially a passthrough since AWS Bedrock returns the native Anthropic response format.
-func (a *anthropicToAWSAnthropicTranslator) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool) (
+func (a *anthropicToAWSAnthropicTranslator) ResponseBody(respHeaders map[string]string, body io.Reader, endOfStream bool) (
 	headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error,
 ) {
-	// Read the response body for both streaming and non-streaming.
-	bodyBytes, err := io.ReadAll(body)
-	if err != nil {
-		return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err)
-	}
-
-	// For streaming chunks, parse SSE format to extract token usage.
-	if !endOfStream {
-		// Parse SSE format - split by lines and look for data: lines.
-		for line := range bytes.Lines(bodyBytes) {
-			line = bytes.TrimSpace(line)
-			if bytes.HasPrefix(line, dataPrefix) {
-				jsonData := bytes.TrimPrefix(line, dataPrefix)
-
-				var eventData map[string]any
-				if unmarshalErr := json.Unmarshal(jsonData, &eventData); unmarshalErr != nil {
-					// Skip lines with invalid JSON (like ping events or malformed data).
-					continue
-				}
-				if eventType, ok := eventData["type"].(string); ok {
-					switch eventType {
-					case "message_start":
-						// Extract input tokens from message.usage.
-						if messageData, ok := eventData["message"].(map[string]any); ok {
-							if usageData, ok := messageData["usage"].(map[string]any); ok {
-								if inputTokens, ok := usageData["input_tokens"].(float64); ok {
-									tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec
-								}
-								// Some message_start events may include initial output tokens.
-								if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 {
-									tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec
-								}
-								tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens
-							}
-						}
-
-					case "message_delta":
-						if usageData, ok := eventData["usage"].(map[string]any); ok {
-							if outputTokens, ok := usageData["output_tokens"].(float64); ok {
-								// Add to existing output tokens (in case message_start had some initial ones).
-								tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec
-								tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens
-							}
-						}
-					}
-				}
-			}
-		}
-
-		return nil, &extprocv3.BodyMutation{
-			Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
-		}, tokenUsage, a.requestModel, nil
-	}
-
-	// Parse the Anthropic response to extract token usage.
-	var anthropicResp anthropic.Message
-	if err = json.Unmarshal(bodyBytes, &anthropicResp); err != nil {
-		// If we can't parse as Anthropic format, pass through as-is.
-		return nil, &extprocv3.BodyMutation{
-			Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
-		}, LLMTokenUsage{}, a.requestModel, nil
-	}
-
-	// Extract token usage from the response.
-	tokenUsage = LLMTokenUsage{
-		InputTokens:       uint32(anthropicResp.Usage.InputTokens),                                    //nolint:gosec
-		OutputTokens:      uint32(anthropicResp.Usage.OutputTokens),                                   //nolint:gosec
-		TotalTokens:       uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec
-		CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens),                           //nolint:gosec
-	}
-
-	// Pass through the response body unchanged since both input and output are Anthropic format.
-	headerMutation = &extprocv3.HeaderMutation{}
-	setContentLength(headerMutation, bodyBytes)
-	bodyMutation = &extprocv3.BodyMutation{
-		Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes},
-	}
-
-	return headerMutation, bodyMutation, tokenUsage, a.requestModel, nil
+	return a.anthropicToAnthropicTranslator.ResponseBody(respHeaders, body, endOfStream)
 }
diff --git a/internal/extproc/translator/anthropic_awsanthropic_test.go b/internal/extproc/translator/anthropic_awsanthropic_test.go
index 8d9c442f55..6c4294401f 100644
--- a/internal/extproc/translator/anthropic_awsanthropic_test.go
+++ b/internal/extproc/translator/anthropic_awsanthropic_test.go
@@ -8,7 +8,6 @@ package translator
 import (
 	"bytes"
 	"encoding/json"
-	"io"
 	"testing"
 
 	"github.com/anthropics/anthropic-sdk-go"
@@ -80,13 +79,17 @@ func TestAnthropicToAWSAnthropicTranslator_RequestBody_ModelNameOverride(t *test
 				},
 			}
 
-			headerMutation, bodyMutation, err := translator.RequestBody(nil, originalReq, false)
+			rawBody, err := json.Marshal(originalReq)
+			require.NoError(t, err)
+
+			headerMutation, bodyMutation, err := translator.RequestBody(rawBody, originalReq, false)
 			require.NoError(t, err)
 			require.NotNil(t, headerMutation)
 			require.NotNil(t, bodyMutation)
 
 			// Check path header contains expected model (URL encoded).
-			pathHeader := headerMutation.SetHeaders[0]
+			// Use the last element as it takes precedence when multiple headers are set.
+			pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-1]
 			require.Equal(t, ":path", pathHeader.Header.Key)
 			expectedPath := "/model/" + tt.expectedInPath + "/invoke"
 			assert.Equal(t, expectedPath, string(pathHeader.Header.RawValue))
@@ -160,7 +163,10 @@ func TestAnthropicToAWSAnthropicTranslator_ComprehensiveMarshalling(t *testing.T
 		},
 	}
 
-	headerMutation, bodyMutation, err := translator.RequestBody(nil, originalReq, false)
+	rawBody, err := json.Marshal(originalReq)
+	require.NoError(t, err)
+
+	headerMutation, bodyMutation, err := translator.RequestBody(rawBody, originalReq, false)
 	require.NoError(t, err)
 	require.NotNil(t, headerMutation)
 	require.NotNil(t, bodyMutation)
@@ -200,7 +206,8 @@ func TestAnthropicToAWSAnthropicTranslator_ComprehensiveMarshalling(t *testing.T
 	require.True(t, ok, "tool_choice should be an object")
 	require.NotEmpty(t, toolChoice)
 
-	pathHeader := headerMutation.SetHeaders[0]
+	// Use the last element as it takes precedence when multiple headers are set.
+	pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-1]
 	require.Equal(t, ":path", pathHeader.Header.Key)
 	expectedPath := "/model/anthropic.claude-3-opus-20240229-v1:0/invoke"
 	require.Equal(t, expectedPath, string(pathHeader.Header.RawValue))
@@ -255,12 +262,16 @@ func TestAnthropicToAWSAnthropicTranslator_RequestBody_StreamingPaths(t *testing
 				}
 			}
 
-			headerMutation, _, err := translator.RequestBody(nil, parsedReq, false)
+			rawBody, err := json.Marshal(parsedReq)
+			require.NoError(t, err)
+
+			headerMutation, _, err := translator.RequestBody(rawBody, parsedReq, false)
 			require.NoError(t, err)
 			require.NotNil(t, headerMutation)
 
 			// Check path contains expected suffix.
-			pathHeader := headerMutation.SetHeaders[0]
+			// Use the last element as it takes precedence when multiple headers are set.
+			pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-1]
 			expectedPath := "/model/anthropic.claude-3-sonnet-20240229-v1:0" + tt.expectedPathSuffix
 			assert.Equal(t, expectedPath, string(pathHeader.Header.RawValue))
 		})
@@ -318,7 +329,10 @@ func TestAnthropicToAWSAnthropicTranslator_RequestBody_FieldPassthrough(t *testi
 		"metadata":    map[string]any{"user.id": "test123"},
 	}
 
-	_, bodyMutation, err := translator.RequestBody(nil, parsedReq, false)
+	rawBody, err := json.Marshal(parsedReq)
+	require.NoError(t, err)
+
+	_, bodyMutation, err := translator.RequestBody(rawBody, parsedReq, false)
 	require.NoError(t, err)
 	require.NotNil(t, bodyMutation)
 
@@ -393,57 +407,6 @@ func TestAnthropicToAWSAnthropicTranslator_ResponseHeaders(t *testing.T) {
 	}
 }
 
-func TestAnthropicToAWSAnthropicTranslator_ResponseBody_NonStreaming(t *testing.T) {
-	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
-
-	// Create a sample Anthropic response.
-	respBody := anthropic.Message{
-		ID:   "msg_test123",
-		Type: "message",
-		Role: "assistant",
-		Content: []anthropic.ContentBlockUnion{
-			{Type: "text", Text: "Hello! How can I help you today?"},
-		},
-		Model: "claude-3-sonnet-20240229",
-		Usage: anthropic.Usage{
-			InputTokens:  25,
-			OutputTokens: 15,
-		},
-	}
-
-	bodyBytes, err := json.Marshal(respBody)
-	require.NoError(t, err)
-
-	bodyReader := bytes.NewReader(bodyBytes)
-	respHeaders := map[string]string{"content-type": "application/json"}
-
-	headerMutation, bodyMutation, tokenUsage, responseModel, err := translator.ResponseBody(respHeaders, bodyReader, true)
-	require.NoError(t, err)
-	require.NotNil(t, headerMutation)
-	require.NotNil(t, bodyMutation)
-
-	expectedUsage := LLMTokenUsage{
-		InputTokens:  25,
-		OutputTokens: 15,
-		TotalTokens:  40,
-	}
-	assert.Equal(t, expectedUsage, tokenUsage)
-
-	// responseModel should be populated from requestModel set during RequestBody.
-	assert.Empty(t, responseModel)
-
-	// Verify body is passed through - compare key fields.
-	var outputResp anthropic.Message
-	err = json.Unmarshal(bodyMutation.GetBody(), &outputResp)
-	require.NoError(t, err)
-	assert.Equal(t, respBody.ID, outputResp.ID)
-	assert.Equal(t, respBody.Type, outputResp.Type)
-	assert.Equal(t, respBody.Role, outputResp.Role)
-	assert.Equal(t, respBody.Model, outputResp.Model)
-	assert.Equal(t, respBody.Usage.InputTokens, outputResp.Usage.InputTokens)
-	assert.Equal(t, respBody.Usage.OutputTokens, outputResp.Usage.OutputTokens)
-}
-
 func TestAnthropicToAWSAnthropicTranslator_ResponseBody_WithCachedTokens(t *testing.T) {
 	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
 
@@ -480,171 +443,299 @@ func TestAnthropicToAWSAnthropicTranslator_ResponseBody_WithCachedTokens(t *test
 	assert.Equal(t, expectedUsage, tokenUsage)
 }
 
-func TestAnthropicToAWSAnthropicTranslator_ResponseBody_StreamingTokenUsage(t *testing.T) {
-	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
-
+func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) {
 	tests := []struct {
-		name          string
-		chunk         string
-		endOfStream   bool
-		expectedUsage LLMTokenUsage
-		expectedBody  string
+		name         string
+		modelID      string
+		expectedPath string
 	}{
 		{
-			name:        "message_start chunk with token usage",
-			chunk:       "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_123\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"model\":\"claude-3-sonnet-20240229\",\"usage\":{\"input_tokens\":25,\"output_tokens\":0}}}\n\n",
-			endOfStream: false,
-			expectedUsage: LLMTokenUsage{
-				InputTokens:  25,
-				OutputTokens: 0,
-				TotalTokens:  25,
-			},
-			expectedBody: "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_123\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"model\":\"claude-3-sonnet-20240229\",\"usage\":{\"input_tokens\":25,\"output_tokens\":0}}}\n\n",
-		},
-		{
-			name:        "content_block_delta chunk without usage",
-			chunk:       "event: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\" to me.\"}}\n\n",
-			endOfStream: false,
-			expectedUsage: LLMTokenUsage{
-				InputTokens:  0,
-				OutputTokens: 0,
-				TotalTokens:  0,
-			},
-			expectedBody: "event: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\" to me.\"}}\n\n",
+			name:         "simple model ID with colon",
+			modelID:      "anthropic.claude-3-sonnet-20240229-v1:0",
+			expectedPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
 		},
 		{
-			name:        "message_delta chunk with output tokens",
-			chunk:       "event: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null},\"usage\":{\"output_tokens\":84}}\n\n",
-			endOfStream: false,
-			expectedUsage: LLMTokenUsage{
-				InputTokens:  0,
-				OutputTokens: 84,
-				TotalTokens:  84,
-			},
-			expectedBody: "event: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null},\"usage\":{\"output_tokens\":84}}\n\n",
+			name:         "full ARN with multiple special characters",
+			modelID:      "arn:aws:bedrock:us-east-1:123456789012:foundation-model/anthropic.claude-3-sonnet-20240229-v1:0",
+			expectedPath: "/model/arn:aws:bedrock:us-east-1:123456789012:foundation-model%2Fanthropic.claude-3-sonnet-20240229-v1:0/invoke",
 		},
 		{
-			name:        "message_stop chunk without usage",
-			chunk:       "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n",
-			endOfStream: false,
-			expectedUsage: LLMTokenUsage{
-				InputTokens:  0,
-				OutputTokens: 0,
-				TotalTokens:  0,
-			},
-			expectedBody: "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n",
+			name:         "global model prefix",
+			modelID:      "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+			expectedPath: "/model/global.anthropic.claude-sonnet-4-5-20250929-v1:0/invoke",
 		},
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			bodyReader := bytes.NewReader([]byte(tt.chunk))
-			respHeaders := map[string]string{"content-type": "text/event-stream"}
+			translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
 
-			headerMutation, bodyMutation, tokenUsage, _, err := translator.ResponseBody(respHeaders, bodyReader, tt.endOfStream)
+			originalReq := &anthropicschema.MessagesRequest{
+				"model": tt.modelID,
+				"messages": []anthropic.MessageParam{
+					{
+						Role: anthropic.MessageParamRoleUser,
+						Content: []anthropic.ContentBlockParamUnion{
+							anthropic.NewTextBlock("Test"),
+						},
+					},
+				},
+			}
 
+			rawBody, err := json.Marshal(originalReq)
 			require.NoError(t, err)
-			require.Nil(t, headerMutation)
-			require.NotNil(t, bodyMutation)
-			require.Equal(t, tt.expectedBody, string(bodyMutation.GetBody()))
-			require.Equal(t, tt.expectedUsage, tokenUsage)
-		})
-	}
-}
-
-func TestAnthropicToAWSAnthropicTranslator_ResponseBody_ReadError(t *testing.T) {
-	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
-
-	// Create a reader that will fail.
-	errorReader := &awsAnthropicErrorReader{}
-	respHeaders := map[string]string{"content-type": "application/json"}
-
-	_, _, _, _, err := translator.ResponseBody(respHeaders, errorReader, true)
-	require.Error(t, err)
-	assert.Contains(t, err.Error(), "failed to read response body")
-}
-
-// awsAnthropicErrorReader implements io.Reader but always returns an error.
-type awsAnthropicErrorReader struct{}
-
-func (e *awsAnthropicErrorReader) Read(_ []byte) (n int, err error) {
-	return 0, io.ErrUnexpectedEOF
-}
 
-func TestAnthropicToAWSAnthropicTranslator_ResponseBody_InvalidJSON(t *testing.T) {
-	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
-
-	invalidJSON := []byte(`{invalid json}`)
-	bodyReader := bytes.NewReader(invalidJSON)
-	respHeaders := map[string]string{"content-type": "application/json"}
-
-	headerMutation, bodyMutation, tokenUsage, _, err := translator.ResponseBody(respHeaders, bodyReader, true)
-
-	// Should not error - just pass through invalid JSON.
-	require.NoError(t, err)
-	require.NotNil(t, bodyMutation)
-	// headerMutation is set with content-length for non-streaming responses
-	if headerMutation != nil {
-		assert.NotEmpty(t, headerMutation.SetHeaders)
-	}
-
-	//nolint:testifylint //  testifylint want to use JSONEq which is not possible
-	assert.Equal(t, invalidJSON, bodyMutation.GetBody())
+			headerMutation, _, err := translator.RequestBody(rawBody, originalReq, false)
+			require.NoError(t, err)
+			require.NotNil(t, headerMutation)
 
-	// Token usage should be zero for invalid JSON.
-	expectedUsage := LLMTokenUsage{
-		InputTokens:  0,
-		OutputTokens: 0,
-		TotalTokens:  0,
+			// Use the last element as it takes precedence when multiple headers are set.
+			pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-1]
+			assert.Equal(t, tt.expectedPath, string(pathHeader.Header.RawValue))
+		})
 	}
-	assert.Equal(t, expectedUsage, tokenUsage)
 }
 
-func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) {
+func TestAnthropicToAWSAnthropicTranslator_FullRequestResponseFlow(t *testing.T) {
 	tests := []struct {
-		name         string
-		modelID      string
-		expectedPath string
+		name              string
+		apiVersion        string
+		modelNameOverride string
+		inputModel        string
+		stream            bool
+		expectedPath      string
+		expectedModel     string // Expected model in translator state for response
 	}{
 		{
-			name:         "simple model ID with colon",
-			modelID:      "anthropic.claude-3-sonnet-20240229-v1:0",
-			expectedPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
+			name:              "non-streaming without override",
+			apiVersion:        "bedrock-2023-05-31",
+			modelNameOverride: "",
+			inputModel:        "anthropic.claude-3-sonnet-20240229-v1:0",
+			stream:            false,
+			expectedPath:      "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
+			expectedModel:     "anthropic.claude-3-sonnet-20240229-v1:0",
 		},
 		{
-			name:         "full ARN with multiple special characters",
-			modelID:      "arn:aws:bedrock:us-east-1:123456789012:foundation-model/anthropic.claude-3-sonnet-20240229-v1:0",
-			expectedPath: "/model/arn:aws:bedrock:us-east-1:123456789012:foundation-model%2Fanthropic.claude-3-sonnet-20240229-v1:0/invoke",
+			name:              "streaming without override",
+			apiVersion:        "bedrock-2023-05-31",
+			modelNameOverride: "",
+			inputModel:        "anthropic.claude-3-haiku-20240307-v1:0",
+			stream:            true,
+			expectedPath:      "/model/anthropic.claude-3-haiku-20240307-v1:0/invoke-stream",
+			expectedModel:     "anthropic.claude-3-haiku-20240307-v1:0",
 		},
 		{
-			name:         "global model prefix",
-			modelID:      "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
-			expectedPath: "/model/global.anthropic.claude-sonnet-4-5-20250929-v1:0/invoke",
+			name:              "non-streaming with model override",
+			apiVersion:        "bedrock-2023-05-31",
+			modelNameOverride: "anthropic.claude-3-opus-20240229-v1:0",
+			inputModel:        "anthropic.claude-3-haiku-20240307-v1:0",
+			stream:            false,
+			expectedPath:      "/model/anthropic.claude-3-opus-20240229-v1:0/invoke",
+			expectedModel:     "anthropic.claude-3-opus-20240229-v1:0",
 		},
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
+			translator := NewAnthropicToAWSAnthropicTranslator(tt.apiVersion, tt.modelNameOverride)
 
 			originalReq := &anthropicschema.MessagesRequest{
-				"model": tt.modelID,
+				"model": tt.inputModel,
 				"messages": []anthropic.MessageParam{
 					{
 						Role: anthropic.MessageParamRoleUser,
 						Content: []anthropic.ContentBlockParamUnion{
-							anthropic.NewTextBlock("Test"),
+							anthropic.NewTextBlock("What's the weather in San Francisco?"),
+						},
+					},
+				},
+				"max_tokens":  1024,
+				"temperature": 0.7,
+				"stream":      tt.stream,
+				"system":      "You are a helpful weather assistant.",
+				"tools": []anthropic.ToolParam{
+					{
+						Name:        "get_weather",
+						Description: anthropic.String("Get current weather for a location"),
+						InputSchema: anthropic.ToolInputSchemaParam{
+							Type: "object",
+							Properties: map[string]any{
+								"location": map[string]any{
+									"type":        "string",
+									"description": "City name",
+								},
+							},
+							Required: []string{"location"},
 						},
 					},
 				},
 			}
 
-			headerMutation, _, err := translator.RequestBody(nil, originalReq, false)
+			rawBody, err := json.Marshal(originalReq)
 			require.NoError(t, err)
-			require.NotNil(t, headerMutation)
 
-			pathHeader := headerMutation.SetHeaders[0]
-			assert.Equal(t, tt.expectedPath, string(pathHeader.Header.RawValue))
+			// Transform the request
+			reqHeaderMutation, reqBodyMutation, err := translator.RequestBody(rawBody, originalReq, false)
+			require.NoError(t, err)
+			require.NotNil(t, reqHeaderMutation)
+			require.NotNil(t, reqBodyMutation)
+
+			// Verify request transformations
+			t.Run("request_transformations", func(t *testing.T) {
+				// Check path is set correctly
+				pathHeader := reqHeaderMutation.SetHeaders[len(reqHeaderMutation.SetHeaders)-1]
+				assert.Equal(t, ":path", pathHeader.Header.Key)
+				assert.Equal(t, tt.expectedPath, string(pathHeader.Header.RawValue))
+
+				// Check body transformations
+				var transformedReq map[string]any
+				err = json.Unmarshal(reqBodyMutation.GetBody(), &transformedReq)
+				require.NoError(t, err)
+
+				// anthropic_version should be added
+				assert.Equal(t, tt.apiVersion, transformedReq["anthropic_version"])
+
+				// model field should be removed (it's in the path)
+				_, hasModel := transformedReq["model"]
+				assert.False(t, hasModel, "model field should be removed from body")
+
+				// Other fields should be preserved
+				assert.Equal(t, float64(1024), transformedReq["max_tokens"])
+				assert.Equal(t, 0.7, transformedReq["temperature"])
+				assert.Equal(t, tt.stream, transformedReq["stream"])
+				assert.Equal(t, "You are a helpful weather assistant.", transformedReq["system"])
+				assert.NotNil(t, transformedReq["messages"])
+				assert.NotNil(t, transformedReq["tools"])
+
+				// Content-length header should be set
+				var contentLengthFound bool
+				for _, header := range reqHeaderMutation.SetHeaders {
+					if header.Header.Key == "content-length" {
+						contentLengthFound = true
+						break
+					}
+				}
+				assert.True(t, contentLengthFound, "content-length header should be set")
+			})
+
+			respHeaders := map[string]string{
+				"content-type": "application/json",
+			}
+
+			// Test ResponseHeaders (should be passthrough)
+			respHeaderMutation, err := translator.ResponseHeaders(respHeaders)
+			require.NoError(t, err)
+			assert.Nil(t, respHeaderMutation, "ResponseHeaders should return nil for passthrough")
+
+			if tt.stream {
+				// Test streaming response
+				t.Run("streaming_response", func(t *testing.T) {
+					// Message start chunk
+					// Note: The model in the streaming response may differ from the request model
+					// AWS Bedrock returns "claude-3-haiku-20240307" while request had "anthropic.claude-3-haiku-20240307-v1:0"
+					messageStartChunk := `event: message_start
+data: {"type":"message_start","message":{"id":"msg_123","type":"message","role":"assistant","content":[],"model":"claude-3-haiku-20240307","usage":{"input_tokens":50,"output_tokens":0}}}
+
+`
+					bodyReader := bytes.NewReader([]byte(messageStartChunk))
+					headerMutation, bodyMutation, tokenUsage, responseModel, err := translator.ResponseBody(respHeaders, bodyReader, false)
+					require.NoError(t, err)
+					assert.Nil(t, headerMutation, "streaming chunks should not modify headers")
+					assert.Nil(t, bodyMutation, "streaming chunks should pass through")
+					// Token usage extraction from streaming chunks depends on buffering implementation
+					// Just verify the extraction works and returns valid data
+					assert.GreaterOrEqual(t, tokenUsage.InputTokens, uint32(0), "input tokens should be non-negative")
+					assert.GreaterOrEqual(t, tokenUsage.TotalTokens, uint32(0), "total tokens should be non-negative")
+					// Response model can be either the full request model or the model from the response
+					assert.NotEmpty(t, responseModel, "response model should be set")
+
+					// Content delta chunk
+					contentDeltaChunk := `event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}}
+
+`
+					bodyReader = bytes.NewReader([]byte(contentDeltaChunk))
+					headerMutation, bodyMutation, tokenUsage, responseModel, err = translator.ResponseBody(respHeaders, bodyReader, false)
+					require.NoError(t, err)
+					assert.Nil(t, headerMutation, "streaming chunks should not modify headers")
+					assert.Nil(t, bodyMutation, "streaming chunks should pass through")
+					assert.Equal(t, uint32(0), tokenUsage.InputTokens)
+					assert.Equal(t, uint32(0), tokenUsage.OutputTokens)
+
+					// Message delta chunk with final token usage
+					messageDeltaChunk := `event: message_delta
+data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":25}}
+
+`
+					bodyReader = bytes.NewReader([]byte(messageDeltaChunk))
+					headerMutation, bodyMutation, tokenUsage, responseModel, err = translator.ResponseBody(respHeaders, bodyReader, false)
+					require.NoError(t, err)
+					assert.Nil(t, headerMutation, "streaming chunks should not modify headers")
+					assert.Nil(t, bodyMutation, "streaming chunks should pass through")
+					// Token usage is buffered and extracted across chunks
+					assert.GreaterOrEqual(t, tokenUsage.OutputTokens, uint32(0), "output tokens should be non-negative")
+					assert.GreaterOrEqual(t, tokenUsage.TotalTokens, uint32(0), "total tokens should be non-negative")
+					assert.NotEmpty(t, responseModel, "response model should be set")
+
+					// Message stop chunk
+					messageStopChunk := `event: message_stop
+data: {"type":"message_stop"}
+
+`
+					bodyReader = bytes.NewReader([]byte(messageStopChunk))
+					headerMutation, bodyMutation, tokenUsage, responseModel, err = translator.ResponseBody(respHeaders, bodyReader, false)
+					require.NoError(t, err)
+					assert.Nil(t, headerMutation, "streaming chunks should not modify headers")
+					assert.Nil(t, bodyMutation, "streaming chunks should pass through")
+					assert.Equal(t, uint32(0), tokenUsage.InputTokens)
+					assert.Equal(t, uint32(0), tokenUsage.OutputTokens)
+				})
+			} else {
+				// Test non-streaming response
+				t.Run("non_streaming_response", func(t *testing.T) {
+					respBody := anthropic.Message{
+						ID:   "msg_test_response",
+						Type: "message",
+						Role: "assistant",
+						Content: []anthropic.ContentBlockUnion{
+							{
+								Type: "text",
+								Text: "The weather in San Francisco is sunny with a temperature of 72°F.",
+							},
+						},
+						Model:      "claude-3-sonnet-20240229",
+						StopReason: anthropic.StopReasonEndTurn,
+						Usage: anthropic.Usage{
+							InputTokens:  45,
+							OutputTokens: 28,
+						},
+					}
+
+					bodyBytes, err := json.Marshal(respBody)
+					require.NoError(t, err)
+
+					bodyReader := bytes.NewReader(bodyBytes)
+					respHeaderMutation, respBodyMutation, tokenUsage, responseModel, err := translator.ResponseBody(respHeaders, bodyReader, true)
+					require.NoError(t, err)
+
+					// AWS Bedrock response is passthrough - no mutations
+					assert.Nil(t, respHeaderMutation, "response should pass through without header mutations")
+					assert.Nil(t, respBodyMutation, "response should pass through without body mutations")
+
+					// Verify token usage extraction
+					expectedUsage := LLMTokenUsage{
+						InputTokens:  45,
+						OutputTokens: 28,
+						TotalTokens:  73,
+					}
+					assert.Equal(t, expectedUsage, tokenUsage)
+
+					// Response model should match request model (or the model from response if available)
+					// The model in the response is "claude-3-sonnet-20240229" but we stored the full ID
+					// The implementation uses response model if available, falling back to request model
+					assert.NotEmpty(t, responseModel, "response model should be set")
+				})
+			}
 		})
 	}
 }

From 7d6cd4766299bdae27298c4ba7b6d14283fd84c6 Mon Sep 17 00:00:00 2001
From: secustor <sebastian@poxhofer.at>
Date: Sat, 25 Oct 2025 20:59:07 +0200
Subject: [PATCH 09/15] test: add upstream test

Signed-off-by: secustor <sebastian@poxhofer.at>
---
 tests/extproc/envoy.yaml             | 67 ++++++++++++++++++++
 tests/extproc/extproc_test.go        |  5 +-
 tests/extproc/real_providers_test.go |  4 --
 tests/extproc/testupstream_test.go   | 95 ++++++++++++++++++++++++++++
 4 files changed, 166 insertions(+), 5 deletions(-)

diff --git a/tests/extproc/envoy.yaml b/tests/extproc/envoy.yaml
index ffe1af0481..c376fb2c72 100644
--- a/tests/extproc/envoy.yaml
+++ b/tests/extproc/envoy.yaml
@@ -222,6 +222,14 @@ static_resources:
                                   exact: gcp-anthropicai
                           route:
                             cluster: testupstream-gcp-anthropicai
+                        - match:
+                            prefix: "/"
+                            headers:
+                              - name: x-test-backend
+                                string_match:
+                                  exact: aws-anthropic
+                          route:
+                            cluster: testupstream-aws-anthropic
                         - match:
                             prefix: "/"
                             headers:
@@ -843,6 +851,65 @@ static_resources:
                   filter_metadata:
                     aigateway.envoy.io:
                       per_route_rule_backend_name: "testupstream-gcp-anthropicai"
+    - name: testupstream-aws-anthropic
+      connect_timeout: 0.25s
+      type: STATIC
+      lb_policy: ROUND_ROBIN
+      outlier_detection:
+        consecutive_5xx: 1
+        interval: 1s
+        base_ejection_time: 2s # Must be smaller than the require.Eventually's interval. Otherwise, the tests may pass without going through the fallback since the always-failing backend could be ejected by the time when require.Eventually retries due to the previous request IF the retry is not configured.
+        max_ejection_percent: 100
+      typed_extension_protocol_options:
+        envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
+          "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
+          explicit_http_config:
+            http_protocol_options: {}
+          http_filters:
+            - name: upstream_extproc
+              typed_config:
+                "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
+                request_attributes:
+                  - xds.upstream_host_metadata
+                processing_mode:
+                  request_header_mode: "SEND"
+                  request_body_mode: "NONE"
+                  response_header_mode: "SKIP"
+                  response_body_mode: "NONE"
+                grpc_service:
+                  envoy_grpc:
+                    cluster_name: extproc_cluster
+                metadataOptions:
+                  receivingNamespaces:
+                    untyped:
+                      - io.envoy.ai_gateway
+            - name: envoy.filters.http.header_mutation
+              typed_config:
+                "@type": type.googleapis.com/envoy.extensions.filters.http.header_mutation.v3.HeaderMutation
+                mutations:
+                  request_mutations:
+                    - append:
+                        append_action: ADD_IF_ABSENT
+                        header:
+                          key: content-length
+                          value: "%DYNAMIC_METADATA(io.envoy.ai_gateway:content_length)%"
+            - name: envoy.filters.http.upstream_codec
+              typed_config:
+                "@type": type.googleapis.com/envoy.extensions.filters.http.upstream_codec.v3.UpstreamCodec
+      load_assignment:
+        cluster_name: testupstream-aws-anthropic
+        endpoints:
+          - priority: 0
+            lb_endpoints:
+              - endpoint:
+                  address:
+                    socket_address:
+                      address: 127.0.0.1
+                      port_value: 8080
+                metadata:
+                  filter_metadata:
+                    aigateway.envoy.io:
+                      per_route_rule_backend_name: "testupstream-aws-anthropic"
     - name: openai
       connect_timeout: 30s
       type: STRICT_DNS
diff --git a/tests/extproc/extproc_test.go b/tests/extproc/extproc_test.go
index 3b3b08df39..2306d0242c 100644
--- a/tests/extproc/extproc_test.go
+++ b/tests/extproc/extproc_test.go
@@ -36,7 +36,7 @@ const (
 var (
 	openAISchema         = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1"}
 	awsBedrockSchema     = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAWSBedrock}
-	awsAnthropicSchema   = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAWSAnthropic}
+	awsAnthropicSchema   = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAWSAnthropic, Version: "bedrock-2023-05-31"}
 	azureOpenAISchema    = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAzureOpenAI, Version: "2025-01-01-preview"}
 	gcpVertexAISchema    = filterapi.VersionedAPISchema{Name: filterapi.APISchemaGCPVertexAI}
 	gcpAnthropicAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaGCPAnthropic, Version: "vertex-2023-10-16"}
@@ -60,6 +60,9 @@ var (
 		Region:      "gcp-region",
 		ProjectName: "gcp-project-name",
 	}}}
+	testUpstreamAWSAnthropicBackend = filterapi.Backend{Name: "testupstream-aws-anthropic", Schema: awsAnthropicSchema, Auth: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{
+		Region: "us-east-1",
+	}}}
 	alwaysFailingBackend = filterapi.Backend{Name: "always-failing-backend", Schema: openAISchema}
 
 	// envoyConfig is the embedded Envoy configuration template.
diff --git a/tests/extproc/real_providers_test.go b/tests/extproc/real_providers_test.go
index 843816b01b..fc51add5a4 100644
--- a/tests/extproc/real_providers_test.go
+++ b/tests/extproc/real_providers_test.go
@@ -50,10 +50,6 @@ func TestWithRealProviders(t *testing.T) {
 				CredentialFileLiteral: cc.AWSFileLiteral,
 				Region:                "us-east-1",
 			}}},
-			{Name: "aws-bedrock-anthropic", Schema: awsAnthropicSchema, Auth: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{
-				CredentialFileLiteral: cc.AWSFileLiteral,
-				Region:                "us-east-1",
-			}}},
 			{Name: "azure-openai", Schema: azureOpenAISchema, Auth: &filterapi.BackendAuth{
 				AzureAuth: &filterapi.AzureAuth{AccessToken: cc.AzureAccessToken},
 			}},
diff --git a/tests/extproc/testupstream_test.go b/tests/extproc/testupstream_test.go
index 124cb33b5e..9c0f1b3251 100644
--- a/tests/extproc/testupstream_test.go
+++ b/tests/extproc/testupstream_test.go
@@ -61,6 +61,7 @@ func TestWithTestUpstream(t *testing.T) {
 			testUpstreamAzureBackend,
 			testUpstreamGCPVertexAIBackend,
 			testUpstreamGCPAnthropicAIBackend,
+			testUpstreamAWSAnthropicBackend,
 			{
 				Name: "testupstream-openai-5xx", Schema: openAISchema, HeaderMutation: &filterapi.HTTPHeaderMutation{
 					Set: []filterapi.HTTPHeader{{Name: testupstreamlib.ResponseStatusKey, Value: "500"}},
@@ -953,6 +954,100 @@ data: {"type":"message_stop"       }
 `,
 			expStatus: http.StatusOK,
 		},
+		{
+			name:            "aws-anthropic - /anthropic/v1/messages",
+			backend:         "aws-anthropic",
+			path:            "/anthropic/v1/messages",
+			method:          http.MethodPost,
+			requestBody:     `{"model":"anthropic.claude-3-sonnet-20240229-v1:0","max_tokens":100,"messages":[{"role":"user","content":[{"type":"text","text":"Hello from AWS!"}]}],"stream":false}`,
+			expRequestBody:  `{"anthropic_version":"bedrock-2023-05-31","max_tokens":100,"messages":[{"content":[{"text":"Hello from AWS!","type":"text"}],"role":"user"}],"stream":false}`,
+			expPath:         "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
+			responseStatus:  strconv.Itoa(http.StatusOK),
+			responseBody:    `{"id":"msg_aws_123","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hello from AWS Anthropic!"}],"usage":{"input_tokens":10,"output_tokens":20}}`,
+			expStatus:       http.StatusOK,
+			expResponseBody: `{"id":"msg_aws_123","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hello from AWS Anthropic!"}],"usage":{"input_tokens":10,"output_tokens":20}}`,
+		},
+		{
+			name:           "aws-anthropic - /anthropic/v1/messages - streaming",
+			backend:        "aws-anthropic",
+			path:           "/anthropic/v1/messages",
+			method:         http.MethodPost,
+			responseType:   "sse",
+			requestBody:    `{"model":"anthropic.claude-3-haiku-20240307-v1:0","max_tokens":150,"messages":[{"role":"user","content":[{"type":"text","text":"Tell me a joke"}]}],"stream":true}`,
+			expRequestBody: `{"anthropic_version":"bedrock-2023-05-31","max_tokens":150,"messages":[{"content":[{"text":"Tell me a joke","type":"text"}],"role":"user"}],"stream":true}`,
+			expPath:        "/model/anthropic.claude-3-haiku-20240307-v1:0/invoke-stream",
+			responseStatus: strconv.Itoa(http.StatusOK),
+			responseBody: `event: message_start
+data: {"type":"message_start","message":{"id":"msg_aws_456","usage":{"input_tokens":12}}}
+
+event: content_block_start
+data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Why did the"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" chicken cross the road?"}}
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":0}
+
+event: message_delta
+data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":18}}
+
+event: message_stop
+data: {"type":"message_stop"}
+
+`,
+			expStatus: http.StatusOK,
+			expResponseBody: `event: message_start
+data: {"type":"message_start","message":{"id":"msg_aws_456","usage":{"input_tokens":12}}}
+
+event: content_block_start
+data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Why did the"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" chicken cross the road?"}}
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":0}
+
+event: message_delta
+data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":18}}
+
+event: message_stop
+data: {"type":"message_stop"}
+
+`,
+		},
+		{
+			name:            "aws-anthropic - /anthropic/v1/messages - ARN model format",
+			backend:         "aws-anthropic",
+			path:            "/anthropic/v1/messages",
+			method:          http.MethodPost,
+			requestBody:     `{"model":"arn:aws:bedrock:eu-central-1:538639307912:application-inference-profile/k375tnm6nr0t","max_tokens":50,"messages":[{"role":"user","content":[{"type":"text","text":"Hi"}]}],"stream":false}`,
+			expRequestBody:  `{"anthropic_version":"bedrock-2023-05-31","max_tokens":50,"messages":[{"content":[{"text":"Hi","type":"text"}],"role":"user"}],"stream":false}`,
+			expPath:         "/model/arn:aws:bedrock:eu-central-1:538639307912:application-inference-profile%2Fk375tnm6nr0t/invoke",
+			responseStatus:  strconv.Itoa(http.StatusOK),
+			responseBody:    `{"id":"msg_arn_789","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hi there!"}],"usage":{"input_tokens":5,"output_tokens":8}}`,
+			expStatus:       http.StatusOK,
+			expResponseBody: `{"id":"msg_arn_789","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hi there!"}],"usage":{"input_tokens":5,"output_tokens":8}}`,
+		},
+		{
+			name:            "aws-anthropic - /anthropic/v1/messages - error response",
+			backend:         "aws-anthropic",
+			path:            "/anthropic/v1/messages",
+			method:          http.MethodPost,
+			requestBody:     `{"model":"anthropic.claude-3-sonnet-20240229-v1:0","max_tokens":100,"messages":[{"role":"user","content":[{"type":"text","text":"Test error"}]}]}`,
+			expPath:         "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
+			responseStatus:  strconv.Itoa(http.StatusBadRequest),
+			responseBody:    `{"type":"error","error":{"type":"validation_error","message":"Invalid request format"}}`,
+			expStatus:       http.StatusBadRequest,
+			expResponseBody: `{"type":"error","error":{"type":"validation_error","message":"Invalid request format"}}`,
+		},
 	} {
 		t.Run(tc.name, func(t *testing.T) {
 			listenerAddress := fmt.Sprintf("http://localhost:%d", listenerPort)

From b60bdf676c48dc9d1bdc9bdcf0feac8cb6791667 Mon Sep 17 00:00:00 2001
From: secustor <sebastian@poxhofer.at>
Date: Tue, 28 Oct 2025 15:52:32 +0100
Subject: [PATCH 10/15] Remove test as requested

Signed-off-by: secustor <sebastian@poxhofer.at>
---
 internal/extproc/messages_processor_test.go   |  78 -----
 .../translator/anthropic_awsanthropic.go      |  17 --
 .../translator/anthropic_awsanthropic_test.go | 281 ------------------
 3 files changed, 376 deletions(-)

diff --git a/internal/extproc/messages_processor_test.go b/internal/extproc/messages_processor_test.go
index 3c09a376da..7d0b696c4e 100644
--- a/internal/extproc/messages_processor_test.go
+++ b/internal/extproc/messages_processor_test.go
@@ -826,84 +826,6 @@ func TestMessagesProcessorUpstreamFilter_ProcessRequestHeaders_WithHeaderMutatio
 		// Check that original headers remain unchanged.
 		require.Equal(t, "bearer token123", headers["authorization"])
 	})
-
-	t.Run("multiple header mutations with same key - last one wins", func(t *testing.T) {
-		headers := map[string]string{
-			":path":         "/anthropic/v1/messages",
-			"x-ai-eg-model": "anthropic.claude-3-haiku-20240307-v1:0",
-		}
-
-		// Create request body.
-		requestBody := &anthropicschema.MessagesRequest{
-			"model":      "anthropic.claude-3-haiku-20240307-v1:0",
-			"max_tokens": 1000,
-			"messages":   []any{map[string]any{"role": "user", "content": "Hello"}},
-		}
-		requestBodyRaw := []byte(`{"model": "anthropic.claude-3-haiku-20240307-v1:0", "max_tokens": 1000, "messages": [{"role": "user", "content": "Hello"}]}`)
-
-		// Create mock translator that returns multiple header mutations for the same key.
-		// This simulates a scenario where the translator sets :path multiple times.
-		mockTranslator := mockAnthropicTranslator{
-			t:                           t,
-			expRequestBody:              requestBody,
-			expForceRequestBodyMutation: false,
-			retHeaderMutation: &extprocv3.HeaderMutation{
-				SetHeaders: []*corev3.HeaderValueOption{
-					{
-						Header: &corev3.HeaderValue{
-							Key:      ":path",
-							RawValue: []byte("/anthropic/v1/messages"),
-						},
-					},
-					{
-						Header: &corev3.HeaderValue{
-							Key:      ":path",
-							RawValue: []byte("/model/anthropic.claude-3-haiku-20240307-v1:0/invoke"),
-						},
-					},
-				},
-			},
-			retBodyMutation: &extprocv3.BodyMutation{},
-			retErr:          nil,
-		}
-
-		// Create mock metrics.
-		chatMetrics := metrics.NewChatCompletionFactory(noop.NewMeterProvider().Meter("test"), map[string]string{})()
-
-		// Create processor.
-		processor := &messagesProcessorUpstreamFilter{
-			config:                 &processorConfig{},
-			requestHeaders:         headers,
-			logger:                 slog.Default(),
-			metrics:                chatMetrics,
-			translator:             mockTranslator,
-			originalRequestBody:    requestBody,
-			originalRequestBodyRaw: requestBodyRaw,
-			handler:                &mockBackendAuthHandler{},
-		}
-
-		ctx := context.Background()
-		response, err := processor.ProcessRequestHeaders(ctx, nil)
-
-		require.NoError(t, err)
-		require.NotNil(t, response)
-
-		commonRes := response.Response.(*extprocv3.ProcessingResponse_RequestHeaders).RequestHeaders.Response
-
-		// Check that header mutations were applied.
-		require.NotNil(t, commonRes.HeaderMutation)
-		require.Len(t, commonRes.HeaderMutation.SetHeaders, 2)
-
-		// Verify that both header mutations are present, with the last one being the final value.
-		require.Equal(t, ":path", commonRes.HeaderMutation.SetHeaders[0].Header.Key)
-		require.Equal(t, []byte("/anthropic/v1/messages"), commonRes.HeaderMutation.SetHeaders[0].Header.RawValue)
-
-		require.Equal(t, ":path", commonRes.HeaderMutation.SetHeaders[1].Header.Key)
-		require.Equal(t, []byte("/model/anthropic.claude-3-haiku-20240307-v1:0/invoke"), commonRes.HeaderMutation.SetHeaders[1].Header.RawValue)
-
-		// The last mutation should win - verify the header value in the processor's headers.
-		require.Equal(t, "/model/anthropic.claude-3-haiku-20240307-v1:0/invoke", headers[":path"])
-	})
 }
 
 func TestMessagesProcessorUpstreamFilter_SetBackend_WithHeaderMutations(t *testing.T) {
diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go
index cc61535364..d4f15c8b19 100644
--- a/internal/extproc/translator/anthropic_awsanthropic.go
+++ b/internal/extproc/translator/anthropic_awsanthropic.go
@@ -7,7 +7,6 @@ package translator
 
 import (
 	"fmt"
-	"io"
 	"net/url"
 
 	corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
@@ -87,19 +86,3 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(rawBody []byte, body *an
 	})
 	return
 }
-
-// ResponseHeaders implements [AnthropicMessagesTranslator.ResponseHeaders] for Anthropic to AWS Bedrock Anthropic.
-func (a *anthropicToAWSAnthropicTranslator) ResponseHeaders(_ map[string]string) (
-	headerMutation *extprocv3.HeaderMutation, err error,
-) {
-	// For Anthropic to AWS Bedrock Anthropic, no header transformation is needed.
-	return nil, nil
-}
-
-// ResponseBody implements [AnthropicMessagesTranslator.ResponseBody] for Anthropic to AWS Bedrock Anthropic.
-// This is essentially a passthrough since AWS Bedrock returns the native Anthropic response format.
-func (a *anthropicToAWSAnthropicTranslator) ResponseBody(respHeaders map[string]string, body io.Reader, endOfStream bool) (
-	headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error,
-) {
-	return a.anthropicToAnthropicTranslator.ResponseBody(respHeaders, body, endOfStream)
-}
diff --git a/internal/extproc/translator/anthropic_awsanthropic_test.go b/internal/extproc/translator/anthropic_awsanthropic_test.go
index 6c4294401f..35adb5efa0 100644
--- a/internal/extproc/translator/anthropic_awsanthropic_test.go
+++ b/internal/extproc/translator/anthropic_awsanthropic_test.go
@@ -6,7 +6,6 @@
 package translator
 
 import (
-	"bytes"
 	"encoding/json"
 	"testing"
 
@@ -407,42 +406,6 @@ func TestAnthropicToAWSAnthropicTranslator_ResponseHeaders(t *testing.T) {
 	}
 }
 
-func TestAnthropicToAWSAnthropicTranslator_ResponseBody_WithCachedTokens(t *testing.T) {
-	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
-
-	// Test response with cached input tokens.
-	respBody := anthropic.Message{
-		ID:      "msg_cached",
-		Type:    "message",
-		Role:    "assistant",
-		Content: []anthropic.ContentBlockUnion{{Type: "text", Text: "Response with cache"}},
-		Model:   "claude-3-sonnet-20240229",
-		Usage: anthropic.Usage{
-			InputTokens:              50,
-			OutputTokens:             20,
-			CacheReadInputTokens:     30,
-			CacheCreationInputTokens: 10,
-		},
-	}
-
-	bodyBytes, err := json.Marshal(respBody)
-	require.NoError(t, err)
-
-	bodyReader := bytes.NewReader(bodyBytes)
-	respHeaders := map[string]string{"content-type": "application/json"}
-
-	_, _, tokenUsage, _, err := translator.ResponseBody(respHeaders, bodyReader, true)
-	require.NoError(t, err)
-
-	expectedUsage := LLMTokenUsage{
-		InputTokens:       50,
-		OutputTokens:      20,
-		TotalTokens:       70,
-		CachedInputTokens: 30,
-	}
-	assert.Equal(t, expectedUsage, tokenUsage)
-}
-
 func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) {
 	tests := []struct {
 		name         string
@@ -495,247 +458,3 @@ func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) {
 		})
 	}
 }
-
-func TestAnthropicToAWSAnthropicTranslator_FullRequestResponseFlow(t *testing.T) {
-	tests := []struct {
-		name              string
-		apiVersion        string
-		modelNameOverride string
-		inputModel        string
-		stream            bool
-		expectedPath      string
-		expectedModel     string // Expected model in translator state for response
-	}{
-		{
-			name:              "non-streaming without override",
-			apiVersion:        "bedrock-2023-05-31",
-			modelNameOverride: "",
-			inputModel:        "anthropic.claude-3-sonnet-20240229-v1:0",
-			stream:            false,
-			expectedPath:      "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
-			expectedModel:     "anthropic.claude-3-sonnet-20240229-v1:0",
-		},
-		{
-			name:              "streaming without override",
-			apiVersion:        "bedrock-2023-05-31",
-			modelNameOverride: "",
-			inputModel:        "anthropic.claude-3-haiku-20240307-v1:0",
-			stream:            true,
-			expectedPath:      "/model/anthropic.claude-3-haiku-20240307-v1:0/invoke-stream",
-			expectedModel:     "anthropic.claude-3-haiku-20240307-v1:0",
-		},
-		{
-			name:              "non-streaming with model override",
-			apiVersion:        "bedrock-2023-05-31",
-			modelNameOverride: "anthropic.claude-3-opus-20240229-v1:0",
-			inputModel:        "anthropic.claude-3-haiku-20240307-v1:0",
-			stream:            false,
-			expectedPath:      "/model/anthropic.claude-3-opus-20240229-v1:0/invoke",
-			expectedModel:     "anthropic.claude-3-opus-20240229-v1:0",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			translator := NewAnthropicToAWSAnthropicTranslator(tt.apiVersion, tt.modelNameOverride)
-
-			originalReq := &anthropicschema.MessagesRequest{
-				"model": tt.inputModel,
-				"messages": []anthropic.MessageParam{
-					{
-						Role: anthropic.MessageParamRoleUser,
-						Content: []anthropic.ContentBlockParamUnion{
-							anthropic.NewTextBlock("What's the weather in San Francisco?"),
-						},
-					},
-				},
-				"max_tokens":  1024,
-				"temperature": 0.7,
-				"stream":      tt.stream,
-				"system":      "You are a helpful weather assistant.",
-				"tools": []anthropic.ToolParam{
-					{
-						Name:        "get_weather",
-						Description: anthropic.String("Get current weather for a location"),
-						InputSchema: anthropic.ToolInputSchemaParam{
-							Type: "object",
-							Properties: map[string]any{
-								"location": map[string]any{
-									"type":        "string",
-									"description": "City name",
-								},
-							},
-							Required: []string{"location"},
-						},
-					},
-				},
-			}
-
-			rawBody, err := json.Marshal(originalReq)
-			require.NoError(t, err)
-
-			// Transform the request
-			reqHeaderMutation, reqBodyMutation, err := translator.RequestBody(rawBody, originalReq, false)
-			require.NoError(t, err)
-			require.NotNil(t, reqHeaderMutation)
-			require.NotNil(t, reqBodyMutation)
-
-			// Verify request transformations
-			t.Run("request_transformations", func(t *testing.T) {
-				// Check path is set correctly
-				pathHeader := reqHeaderMutation.SetHeaders[len(reqHeaderMutation.SetHeaders)-1]
-				assert.Equal(t, ":path", pathHeader.Header.Key)
-				assert.Equal(t, tt.expectedPath, string(pathHeader.Header.RawValue))
-
-				// Check body transformations
-				var transformedReq map[string]any
-				err = json.Unmarshal(reqBodyMutation.GetBody(), &transformedReq)
-				require.NoError(t, err)
-
-				// anthropic_version should be added
-				assert.Equal(t, tt.apiVersion, transformedReq["anthropic_version"])
-
-				// model field should be removed (it's in the path)
-				_, hasModel := transformedReq["model"]
-				assert.False(t, hasModel, "model field should be removed from body")
-
-				// Other fields should be preserved
-				assert.Equal(t, float64(1024), transformedReq["max_tokens"])
-				assert.Equal(t, 0.7, transformedReq["temperature"])
-				assert.Equal(t, tt.stream, transformedReq["stream"])
-				assert.Equal(t, "You are a helpful weather assistant.", transformedReq["system"])
-				assert.NotNil(t, transformedReq["messages"])
-				assert.NotNil(t, transformedReq["tools"])
-
-				// Content-length header should be set
-				var contentLengthFound bool
-				for _, header := range reqHeaderMutation.SetHeaders {
-					if header.Header.Key == "content-length" {
-						contentLengthFound = true
-						break
-					}
-				}
-				assert.True(t, contentLengthFound, "content-length header should be set")
-			})
-
-			respHeaders := map[string]string{
-				"content-type": "application/json",
-			}
-
-			// Test ResponseHeaders (should be passthrough)
-			respHeaderMutation, err := translator.ResponseHeaders(respHeaders)
-			require.NoError(t, err)
-			assert.Nil(t, respHeaderMutation, "ResponseHeaders should return nil for passthrough")
-
-			if tt.stream {
-				// Test streaming response
-				t.Run("streaming_response", func(t *testing.T) {
-					// Message start chunk
-					// Note: The model in the streaming response may differ from the request model
-					// AWS Bedrock returns "claude-3-haiku-20240307" while request had "anthropic.claude-3-haiku-20240307-v1:0"
-					messageStartChunk := `event: message_start
-data: {"type":"message_start","message":{"id":"msg_123","type":"message","role":"assistant","content":[],"model":"claude-3-haiku-20240307","usage":{"input_tokens":50,"output_tokens":0}}}
-
-`
-					bodyReader := bytes.NewReader([]byte(messageStartChunk))
-					headerMutation, bodyMutation, tokenUsage, responseModel, err := translator.ResponseBody(respHeaders, bodyReader, false)
-					require.NoError(t, err)
-					assert.Nil(t, headerMutation, "streaming chunks should not modify headers")
-					assert.Nil(t, bodyMutation, "streaming chunks should pass through")
-					// Token usage extraction from streaming chunks depends on buffering implementation
-					// Just verify the extraction works and returns valid data
-					assert.GreaterOrEqual(t, tokenUsage.InputTokens, uint32(0), "input tokens should be non-negative")
-					assert.GreaterOrEqual(t, tokenUsage.TotalTokens, uint32(0), "total tokens should be non-negative")
-					// Response model can be either the full request model or the model from the response
-					assert.NotEmpty(t, responseModel, "response model should be set")
-
-					// Content delta chunk
-					contentDeltaChunk := `event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}}
-
-`
-					bodyReader = bytes.NewReader([]byte(contentDeltaChunk))
-					headerMutation, bodyMutation, tokenUsage, responseModel, err = translator.ResponseBody(respHeaders, bodyReader, false)
-					require.NoError(t, err)
-					assert.Nil(t, headerMutation, "streaming chunks should not modify headers")
-					assert.Nil(t, bodyMutation, "streaming chunks should pass through")
-					assert.Equal(t, uint32(0), tokenUsage.InputTokens)
-					assert.Equal(t, uint32(0), tokenUsage.OutputTokens)
-
-					// Message delta chunk with final token usage
-					messageDeltaChunk := `event: message_delta
-data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":25}}
-
-`
-					bodyReader = bytes.NewReader([]byte(messageDeltaChunk))
-					headerMutation, bodyMutation, tokenUsage, responseModel, err = translator.ResponseBody(respHeaders, bodyReader, false)
-					require.NoError(t, err)
-					assert.Nil(t, headerMutation, "streaming chunks should not modify headers")
-					assert.Nil(t, bodyMutation, "streaming chunks should pass through")
-					// Token usage is buffered and extracted across chunks
-					assert.GreaterOrEqual(t, tokenUsage.OutputTokens, uint32(0), "output tokens should be non-negative")
-					assert.GreaterOrEqual(t, tokenUsage.TotalTokens, uint32(0), "total tokens should be non-negative")
-					assert.NotEmpty(t, responseModel, "response model should be set")
-
-					// Message stop chunk
-					messageStopChunk := `event: message_stop
-data: {"type":"message_stop"}
-
-`
-					bodyReader = bytes.NewReader([]byte(messageStopChunk))
-					headerMutation, bodyMutation, tokenUsage, responseModel, err = translator.ResponseBody(respHeaders, bodyReader, false)
-					require.NoError(t, err)
-					assert.Nil(t, headerMutation, "streaming chunks should not modify headers")
-					assert.Nil(t, bodyMutation, "streaming chunks should pass through")
-					assert.Equal(t, uint32(0), tokenUsage.InputTokens)
-					assert.Equal(t, uint32(0), tokenUsage.OutputTokens)
-				})
-			} else {
-				// Test non-streaming response
-				t.Run("non_streaming_response", func(t *testing.T) {
-					respBody := anthropic.Message{
-						ID:   "msg_test_response",
-						Type: "message",
-						Role: "assistant",
-						Content: []anthropic.ContentBlockUnion{
-							{
-								Type: "text",
-								Text: "The weather in San Francisco is sunny with a temperature of 72°F.",
-							},
-						},
-						Model:      "claude-3-sonnet-20240229",
-						StopReason: anthropic.StopReasonEndTurn,
-						Usage: anthropic.Usage{
-							InputTokens:  45,
-							OutputTokens: 28,
-						},
-					}
-
-					bodyBytes, err := json.Marshal(respBody)
-					require.NoError(t, err)
-
-					bodyReader := bytes.NewReader(bodyBytes)
-					respHeaderMutation, respBodyMutation, tokenUsage, responseModel, err := translator.ResponseBody(respHeaders, bodyReader, true)
-					require.NoError(t, err)
-
-					// AWS Bedrock response is passthrough - no mutations
-					assert.Nil(t, respHeaderMutation, "response should pass through without header mutations")
-					assert.Nil(t, respBodyMutation, "response should pass through without body mutations")
-
-					// Verify token usage extraction
-					expectedUsage := LLMTokenUsage{
-						InputTokens:  45,
-						OutputTokens: 28,
-						TotalTokens:  73,
-					}
-					assert.Equal(t, expectedUsage, tokenUsage)
-
-					// Response model should match request model (or the model from response if available)
-					// The model in the response is "claude-3-sonnet-20240229" but we stored the full ID
-					// The implementation uses response model if available, falling back to request model
-					assert.NotEmpty(t, responseModel, "response model should be set")
-				})
-			}
-		})
-	}
-}

From 55330466e7091513c24ea7cd8f977e213e66fe51 Mon Sep 17 00:00:00 2001
From: secustor <sebastian@poxhofer.at>
Date: Tue, 28 Oct 2025 15:52:51 +0100
Subject: [PATCH 11/15] overwrite via header mutation

Signed-off-by: secustor <sebastian@poxhofer.at>
---
 .../extproc/translator/anthropic_awsanthropic.go    | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go
index d4f15c8b19..577d49a090 100644
--- a/internal/extproc/translator/anthropic_awsanthropic.go
+++ b/internal/extproc/translator/anthropic_awsanthropic.go
@@ -60,9 +60,6 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(rawBody []byte, body *an
 		Mutation: &extprocv3.BodyMutation_Body{Body: preparedBody},
 	}
 
-	// update content length after changing the body
-	setContentLength(headerMutation, preparedBody)
-
 	// Determine the AWS Bedrock path based on whether streaming is requested.
 	var pathTemplate string
 	if body.GetStream() {
@@ -79,10 +76,18 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(rawBody []byte, body *an
 
 	// Overwriting path of the Anthropic to Anthropic translator
 	headerMutation.SetHeaders = append(headerMutation.SetHeaders, &corev3.HeaderValueOption{
+		AppendAction: corev3.HeaderValueOption_OVERWRITE_IF_EXISTS,
 		Header: &corev3.HeaderValue{
 			Key:      ":path",
 			RawValue: []byte(pathSuffix),
 		},
-	})
+	},
+		&corev3.HeaderValueOption{
+			AppendAction: corev3.HeaderValueOption_OVERWRITE_IF_EXISTS,
+			Header: &corev3.HeaderValue{
+				Key:      "content-length",
+				RawValue: fmt.Appendf(nil, "%d", len(preparedBody)),
+			},
+		})
 	return
 }

From 84f31624ee2bb61755a2a111d900f91d80f4bb25 Mon Sep 17 00:00:00 2001
From: secustor <sebastian@poxhofer.at>
Date: Wed, 29 Oct 2025 18:25:50 +0100
Subject: [PATCH 12/15] remove another test

Signed-off-by: secustor <sebastian@poxhofer.at>
---
 .../translator/anthropic_awsanthropic_test.go | 30 -------------------
 1 file changed, 30 deletions(-)

diff --git a/internal/extproc/translator/anthropic_awsanthropic_test.go b/internal/extproc/translator/anthropic_awsanthropic_test.go
index 35adb5efa0..1afefdcec4 100644
--- a/internal/extproc/translator/anthropic_awsanthropic_test.go
+++ b/internal/extproc/translator/anthropic_awsanthropic_test.go
@@ -376,36 +376,6 @@ func TestAnthropicToAWSAnthropicTranslator_RequestBody_FieldPassthrough(t *testi
 	require.Equal(t, "bedrock-2023-05-31", version, "anthropic_version should match the configured version")
 }
 
-func TestAnthropicToAWSAnthropicTranslator_ResponseHeaders(t *testing.T) {
-	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
-
-	tests := []struct {
-		name    string
-		headers map[string]string
-	}{
-		{
-			name:    "empty headers",
-			headers: map[string]string{},
-		},
-		{
-			name: "various headers",
-			headers: map[string]string{
-				"content-type":  "application/json",
-				"authorization": "Bearer token",
-				"custom-header": "value",
-			},
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			headerMutation, err := translator.ResponseHeaders(tt.headers)
-			require.NoError(t, err)
-			assert.Nil(t, headerMutation, "ResponseHeaders should return nil for passthrough")
-		})
-	}
-}
-
 func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) {
 	tests := []struct {
 		name         string

From 9af975bbe74cb337da81ab612911d97c3eed29ae Mon Sep 17 00:00:00 2001
From: secustor <sebastian@poxhofer.at>
Date: Wed, 29 Oct 2025 18:35:53 +0100
Subject: [PATCH 13/15] fixup tests because of header change

Signed-off-by: secustor <sebastian@poxhofer.at>
---
 .../extproc/translator/anthropic_awsanthropic_test.go     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/internal/extproc/translator/anthropic_awsanthropic_test.go b/internal/extproc/translator/anthropic_awsanthropic_test.go
index 1afefdcec4..ba8a50eeb9 100644
--- a/internal/extproc/translator/anthropic_awsanthropic_test.go
+++ b/internal/extproc/translator/anthropic_awsanthropic_test.go
@@ -88,7 +88,7 @@ func TestAnthropicToAWSAnthropicTranslator_RequestBody_ModelNameOverride(t *test
 
 			// Check path header contains expected model (URL encoded).
 			// Use the last element as it takes precedence when multiple headers are set.
-			pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-1]
+			pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-2]
 			require.Equal(t, ":path", pathHeader.Header.Key)
 			expectedPath := "/model/" + tt.expectedInPath + "/invoke"
 			assert.Equal(t, expectedPath, string(pathHeader.Header.RawValue))
@@ -206,7 +206,7 @@ func TestAnthropicToAWSAnthropicTranslator_ComprehensiveMarshalling(t *testing.T
 	require.NotEmpty(t, toolChoice)
 
 	// Use the last element as it takes precedence when multiple headers are set.
-	pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-1]
+	pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-2]
 	require.Equal(t, ":path", pathHeader.Header.Key)
 	expectedPath := "/model/anthropic.claude-3-opus-20240229-v1:0/invoke"
 	require.Equal(t, expectedPath, string(pathHeader.Header.RawValue))
@@ -270,7 +270,7 @@ func TestAnthropicToAWSAnthropicTranslator_RequestBody_StreamingPaths(t *testing
 
 			// Check path contains expected suffix.
 			// Use the last element as it takes precedence when multiple headers are set.
-			pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-1]
+			pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-2]
 			expectedPath := "/model/anthropic.claude-3-sonnet-20240229-v1:0" + tt.expectedPathSuffix
 			assert.Equal(t, expectedPath, string(pathHeader.Header.RawValue))
 		})
@@ -423,7 +423,7 @@ func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) {
 			require.NotNil(t, headerMutation)
 
 			// Use the last element as it takes precedence when multiple headers are set.
-			pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-1]
+			pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-2]
 			assert.Equal(t, tt.expectedPath, string(pathHeader.Header.RawValue))
 		})
 	}

From a1f42ca9816f8394810438233b3b5e241a70ba41 Mon Sep 17 00:00:00 2001
From: Takeshi Yoneda <t.y.mathetake@gmail.com>
Date: Wed, 29 Oct 2025 14:26:14 -0700
Subject: [PATCH 14/15] cleanup

Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
---
 .../translator/anthropic_awsanthropic.go      |  47 ++--
 .../translator/anthropic_awsanthropic_test.go | 203 ------------------
 .../llm-integrations/supported-endpoints.md   |  40 ++--
 tests/extproc/envoy.yaml                      | 140 ++++++++++++
 tests/extproc/extproc_test.go                 |   1 +
 tests/extproc/real_providers_test.go          |  47 ++++
 tests/extproc/testupstream_test.go            |  25 +--
 7 files changed, 230 insertions(+), 273 deletions(-)

diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go
index 577d49a090..891f7996eb 100644
--- a/internal/extproc/translator/anthropic_awsanthropic.go
+++ b/internal/extproc/translator/anthropic_awsanthropic.go
@@ -6,6 +6,7 @@
 package translator
 
 import (
+	"cmp"
 	"fmt"
 	"net/url"
 
@@ -39,26 +40,17 @@ type anthropicToAWSAnthropicTranslator struct {
 func (a *anthropicToAWSAnthropicTranslator) RequestBody(rawBody []byte, body *anthropicschema.MessagesRequest, _ bool) (
 	headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, err error,
 ) {
-	// AWS Bedrock always needs a body mutation because we must add anthropic_version and remove model field
-	headerMutation, bodyMutation, err = a.anthropicToAnthropicTranslator.RequestBody(rawBody, body, true)
-	if err != nil {
-		return
-	}
+	a.stream = body.GetStream()
+	a.requestModel = cmp.Or(a.modelNameOverride, body.GetModel())
 
-	// add anthropic_version field
-	preparedBody, err := sjson.SetBytes(bodyMutation.GetBody(), anthropicVersionKey, a.apiVersion)
+	var mutatedBody []byte
+	mutatedBody, err = sjson.SetBytes(rawBody, anthropicVersionKey, a.apiVersion)
 	if err != nil {
 		return nil, nil, fmt.Errorf("failed to set anthropic_version field: %w", err)
 	}
-	// delete model field as AWS Bedrock expects model in the path, not in the body
-	preparedBody, err = sjson.DeleteBytes(preparedBody, "model")
-	if err != nil {
-		return nil, nil, fmt.Errorf("failed to delete model field: %w", err)
-	}
-
-	bodyMutation = &extprocv3.BodyMutation{
-		Mutation: &extprocv3.BodyMutation_Body{Body: preparedBody},
-	}
+	// Remove the model field from the body as AWS Bedrock expects the model to be specified in the path.
+	// Otherwise, AWS complains "extra inputs are not permitted".
+	mutatedBody, _ = sjson.DeleteBytes(mutatedBody, "model")
 
 	// Determine the AWS Bedrock path based on whether streaming is requested.
 	var pathTemplate string
@@ -72,22 +64,15 @@ func (a *anthropicToAWSAnthropicTranslator) RequestBody(rawBody []byte, body *an
 	// AWS Bedrock model IDs can be simple names (e.g., "anthropic.claude-3-5-sonnet-20241022-v2:0")
 	// or full ARNs which may contain special characters.
 	encodedModelID := url.PathEscape(a.requestModel)
-	pathSuffix := fmt.Sprintf(pathTemplate, encodedModelID)
+	path := fmt.Sprintf(pathTemplate, encodedModelID)
 
-	// Overwriting path of the Anthropic to Anthropic translator
-	headerMutation.SetHeaders = append(headerMutation.SetHeaders, &corev3.HeaderValueOption{
-		AppendAction: corev3.HeaderValueOption_OVERWRITE_IF_EXISTS,
-		Header: &corev3.HeaderValue{
-			Key:      ":path",
-			RawValue: []byte(pathSuffix),
+	headerMutation = &extprocv3.HeaderMutation{
+		SetHeaders: []*corev3.HeaderValueOption{
+			// Overwriting path of the Anthropic to Anthropic translator
+			{Header: &corev3.HeaderValue{Key: ":path", RawValue: []byte(path)}},
 		},
-	},
-		&corev3.HeaderValueOption{
-			AppendAction: corev3.HeaderValueOption_OVERWRITE_IF_EXISTS,
-			Header: &corev3.HeaderValue{
-				Key:      "content-length",
-				RawValue: fmt.Appendf(nil, "%d", len(preparedBody)),
-			},
-		})
+	}
+	bodyMutation = &extprocv3.BodyMutation{Mutation: &extprocv3.BodyMutation_Body{Body: mutatedBody}}
+	setContentLength(headerMutation, mutatedBody)
 	return
 }
diff --git a/internal/extproc/translator/anthropic_awsanthropic_test.go b/internal/extproc/translator/anthropic_awsanthropic_test.go
index ba8a50eeb9..90097c1f0c 100644
--- a/internal/extproc/translator/anthropic_awsanthropic_test.go
+++ b/internal/extproc/translator/anthropic_awsanthropic_test.go
@@ -108,110 +108,6 @@ func TestAnthropicToAWSAnthropicTranslator_RequestBody_ModelNameOverride(t *test
 	}
 }
 
-func TestAnthropicToAWSAnthropicTranslator_ComprehensiveMarshalling(t *testing.T) {
-	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
-
-	// Create a comprehensive MessagesRequest with all possible fields using map structure.
-	originalReq := &anthropicschema.MessagesRequest{
-		"model": "anthropic.claude-3-opus-20240229-v1:0",
-		"messages": []anthropic.MessageParam{
-			{
-				Role: anthropic.MessageParamRoleUser,
-				Content: []anthropic.ContentBlockParamUnion{
-					anthropic.NewTextBlock("Hello, how are you?"),
-				},
-			},
-			{
-				Role: anthropic.MessageParamRoleAssistant,
-				Content: []anthropic.ContentBlockParamUnion{
-					anthropic.NewTextBlock("I'm doing well, thank you!"),
-				},
-			},
-			{
-				Role: anthropic.MessageParamRoleUser,
-				Content: []anthropic.ContentBlockParamUnion{
-					anthropic.NewTextBlock("Can you help me with the weather?"),
-				},
-			},
-		},
-		"max_tokens":     1024,
-		"stream":         false,
-		"temperature":    func() *float64 { v := 0.7; return &v }(),
-		"top_p":          func() *float64 { v := 0.95; return &v }(),
-		"top_k":          func() *int { v := 40; return &v }(),
-		"stop_sequences": []string{"Human:", "Assistant:"},
-		"system":         "You are a helpful weather assistant.",
-		"tools": []anthropic.ToolParam{
-			{
-				Name:        "get_weather",
-				Description: anthropic.String("Get current weather information"),
-				InputSchema: anthropic.ToolInputSchemaParam{
-					Type: "object",
-					Properties: map[string]any{
-						"location": map[string]any{
-							"type":        "string",
-							"description": "City name",
-						},
-					},
-					Required: []string{"location"},
-				},
-			},
-		},
-		"tool_choice": anthropic.ToolChoiceUnionParam{
-			OfAuto: &anthropic.ToolChoiceAutoParam{},
-		},
-	}
-
-	rawBody, err := json.Marshal(originalReq)
-	require.NoError(t, err)
-
-	headerMutation, bodyMutation, err := translator.RequestBody(rawBody, originalReq, false)
-	require.NoError(t, err)
-	require.NotNil(t, headerMutation)
-	require.NotNil(t, bodyMutation)
-
-	var outputReq map[string]any
-	err = json.Unmarshal(bodyMutation.GetBody(), &outputReq)
-	require.NoError(t, err)
-
-	require.NotContains(t, outputReq, "model", "model field should be removed for AWS Bedrock")
-
-	// AWS Bedrock requires anthropic_version field.
-	require.Contains(t, outputReq, "anthropic_version", "anthropic_version should be added for AWS Bedrock")
-	require.Equal(t, "bedrock-2023-05-31", outputReq["anthropic_version"], "anthropic_version should match the configured version")
-
-	messages, ok := outputReq["messages"].([]any)
-	require.True(t, ok, "messages should be an array")
-	require.Len(t, messages, 3, "should have 3 messages")
-
-	require.Equal(t, float64(1024), outputReq["max_tokens"])
-	require.Equal(t, false, outputReq["stream"])
-	require.Equal(t, 0.7, outputReq["temperature"])
-	require.Equal(t, 0.95, outputReq["top_p"])
-	require.Equal(t, float64(40), outputReq["top_k"])
-	require.Equal(t, "You are a helpful weather assistant.", outputReq["system"])
-
-	stopSeq, ok := outputReq["stop_sequences"].([]any)
-	require.True(t, ok, "stop_sequences should be an array")
-	require.Len(t, stopSeq, 2)
-	require.Equal(t, "Human:", stopSeq[0])
-	require.Equal(t, "Assistant:", stopSeq[1])
-
-	tools, ok := outputReq["tools"].([]any)
-	require.True(t, ok, "tools should be an array")
-	require.Len(t, tools, 1)
-
-	toolChoice, ok := outputReq["tool_choice"].(map[string]any)
-	require.True(t, ok, "tool_choice should be an object")
-	require.NotEmpty(t, toolChoice)
-
-	// Use the last element as it takes precedence when multiple headers are set.
-	pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-2]
-	require.Equal(t, ":path", pathHeader.Header.Key)
-	expectedPath := "/model/anthropic.claude-3-opus-20240229-v1:0/invoke"
-	require.Equal(t, expectedPath, string(pathHeader.Header.RawValue))
-}
-
 func TestAnthropicToAWSAnthropicTranslator_RequestBody_StreamingPaths(t *testing.T) {
 	tests := []struct {
 		name               string
@@ -277,105 +173,6 @@ func TestAnthropicToAWSAnthropicTranslator_RequestBody_StreamingPaths(t *testing
 	}
 }
 
-func TestAnthropicToAWSAnthropicTranslator_RequestBody_FieldPassthrough(t *testing.T) {
-	translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
-
-	temp := 0.7
-	topP := 0.95
-	topK := 40
-	parsedReq := &anthropicschema.MessagesRequest{
-		"model": "anthropic.claude-3-sonnet-20240229-v1:0",
-		"messages": []anthropic.MessageParam{
-			{
-				Role: anthropic.MessageParamRoleUser,
-				Content: []anthropic.ContentBlockParamUnion{
-					anthropic.NewTextBlock("Hello, world!"),
-				},
-			},
-			{
-				Role: anthropic.MessageParamRoleAssistant,
-				Content: []anthropic.ContentBlockParamUnion{
-					anthropic.NewTextBlock("Hi there!"),
-				},
-			},
-			{
-				Role: anthropic.MessageParamRoleUser,
-				Content: []anthropic.ContentBlockParamUnion{
-					anthropic.NewTextBlock("How are you?"),
-				},
-			},
-		},
-		"max_tokens":     1000,
-		"temperature":    &temp,
-		"top_p":          &topP,
-		"top_k":          &topK,
-		"stop_sequences": []string{"Human:", "Assistant:"},
-		"stream":         false,
-		"system":         "You are a helpful assistant",
-		"tools": []anthropic.ToolParam{
-			{
-				Name:        "get_weather",
-				Description: anthropic.String("Get weather info"),
-				InputSchema: anthropic.ToolInputSchemaParam{
-					Type: "object",
-					Properties: map[string]any{
-						"location": map[string]any{"type": "string"},
-					},
-				},
-			},
-		},
-		"tool_choice": map[string]any{"type": "auto"},
-		"metadata":    map[string]any{"user.id": "test123"},
-	}
-
-	rawBody, err := json.Marshal(parsedReq)
-	require.NoError(t, err)
-
-	_, bodyMutation, err := translator.RequestBody(rawBody, parsedReq, false)
-	require.NoError(t, err)
-	require.NotNil(t, bodyMutation)
-
-	var modifiedReq map[string]any
-	err = json.Unmarshal(bodyMutation.GetBody(), &modifiedReq)
-	require.NoError(t, err)
-
-	// Messages should be preserved.
-	require.Len(t, modifiedReq["messages"], 3)
-
-	// Numeric fields get converted to float64 by JSON unmarshalling.
-	require.Equal(t, float64(1000), modifiedReq["max_tokens"])
-	require.Equal(t, 0.7, modifiedReq["temperature"])
-	require.Equal(t, 0.95, modifiedReq["top_p"])
-	require.Equal(t, float64(40), modifiedReq["top_k"])
-
-	// Arrays become []interface{} by JSON unmarshalling.
-	stopSeq, ok := modifiedReq["stop_sequences"].([]any)
-	require.True(t, ok)
-	require.Len(t, stopSeq, 2)
-	require.Equal(t, "Human:", stopSeq[0])
-	require.Equal(t, "Assistant:", stopSeq[1])
-
-	// Boolean false values are now included in the map.
-	require.Equal(t, false, modifiedReq["stream"])
-
-	// String values are preserved.
-	require.Equal(t, "You are a helpful assistant", modifiedReq["system"])
-
-	// Complex objects should be preserved as maps.
-	require.NotNil(t, modifiedReq["tools"])
-	require.NotNil(t, modifiedReq["tool_choice"])
-	require.NotNil(t, modifiedReq["metadata"])
-
-	// Verify model field is removed from body (it's in the path instead).
-	_, hasModel := modifiedReq["model"]
-	require.False(t, hasModel, "model field should be removed from request body")
-
-	// Verify anthropic_version is added for AWS Bedrock.
-	version, hasVersion := modifiedReq["anthropic_version"]
-	require.True(t, hasVersion, "anthropic_version should be added for AWS Bedrock")
-	require.Equal(t, "bedrock-2023-05-31", version, "anthropic_version should match the configured version")
-}
-
 func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) {
 	tests := []struct {
 		name         string
diff --git a/site/docs/capabilities/llm-integrations/supported-endpoints.md b/site/docs/capabilities/llm-integrations/supported-endpoints.md
index 949724cadb..cae217e991 100644
--- a/site/docs/capabilities/llm-integrations/supported-endpoints.md
+++ b/site/docs/capabilities/llm-integrations/supported-endpoints.md
@@ -230,26 +230,26 @@ curl $GATEWAY_URL/v1/models
 The following table summarizes which providers support which endpoints:
 
 | Provider                                                                                              | Chat Completions | Completions | Embeddings | Image Generation | Anthropic Messages | Notes                                                                                                                |
-|-------------------------------------------------------------------------------------------------------|:----------------:|:-----------:|:----------:|:----------------:|:------------------:|----------------------------------------------------------------------------------------------------------------------|
-| [OpenAI](https://platform.openai.com/docs/api-reference)                                              |        ✅         |      ✅      |     ✅      |        ✅         |         ❌          |                                                                                                                      |
-| [AWS Bedrock](https://docs.aws.amazon.com/bedrock/latest/APIReference/)                               |        ✅         |     🚧      |     🚧     |        ❌         |         ❌          | Via API translation                                                                                                  |
-| [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference)                  |        ✅         |     🚧      |     ✅      |        ⚠️        |         ❌          | Via API translation or via [OpenAI-compatible API](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/latest) |
-| [Google Gemini](https://ai.google.dev/gemini-api/docs/openai)                                         |        ✅         |     ⚠️      |     ✅      |        ⚠️        |         ❌          | Via OpenAI-compatible API                                                                                            |
-| [Groq](https://console.groq.com/docs/openai)                                                          |        ✅         |      ❌      |     ❌      |        ❌         |         ❌          | Via OpenAI-compatible API                                                                                            |
-| [Grok](https://docs.x.ai/docs/api-reference)                                                          |        ✅         |     ⚠️      |     ❌      |        ⚠️        |         ❌          | Via OpenAI-compatible API                                                                                            |
-| [Together AI](https://docs.together.ai/docs/openai-api-compatibility)                                 |        ⚠️        |     ⚠️      |     ⚠️     |        ⚠️        |         ❌          | Via OpenAI-compatible API                                                                                            |
-| [Cohere](https://docs.cohere.com/v2/docs/compatibility-api)                                           |        ⚠️        |     ⚠️      |     ⚠️     |        ❌         |         ❌          | Via OpenAI-compatible API                                                                                            |
-| [Mistral](https://docs.mistral.ai/api/)                                                               |        ⚠️        |     ⚠️      |     ⚠️     |        ❌         |         ❌          | Via OpenAI-compatible API                                                                                            |
-| [DeepInfra](https://deepinfra.com/docs/inference)                                                     |        ✅         |     ⚠️      |     ✅      |        ⚠️        |         ❌          | Via OpenAI-compatible API                                                                                            |
-| [DeepSeek](https://api-docs.deepseek.com/)                                                            |        ⚠️        |     ⚠️      |     ❌      |        ❌         |         ❌          | Via OpenAI-compatible API                                                                                            |
-| [Hunyuan](https://cloud.tencent.com/document/product/1729/111007)                                     |        ⚠️        |     ⚠️      |     ⚠️     |        ❌         |         ❌          | Via OpenAI-compatible API                                                                                            |
-| [Tencent LLM Knowledge Engine](https://www.tencentcloud.com/document/product/1255/70381)              |        ⚠️        |      ❌      |     ❌      |        ❌         |         ❌          | Via OpenAI-compatible API                                                                                            |
-| [Tetrate Agent Router Service (TARS)](https://router.tetrate.ai/)                                     |        ⚠️        |     ⚠️      |     ⚠️     |        ❌         |         ❌          | Via OpenAI-compatible API                                                                                            |
-| [Google Vertex AI](https://cloud.google.com/vertex-ai/docs/reference/rest)                            |        ✅         |     🚧      |     🚧     |        ❌         |         ❌          | Via OpenAI-compatible API                                                                                            |
-| [Anthropic on Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude) |        ✅         |      ❌      |     🚧     |        ❌         |         ✅          | Via OpenAI-compatible API and Native Anthropic API                                                                   |
-| [Anthropic on AWS Bedrock](https://aws.amazon.com/bedrock/anthropic/)                                 |        🚧        |      ❌      |     ❌      |        ❌         |         ✅          | Native Anthropic API                                                                                                 |
-| [SambaNova](https://docs.sambanova.ai/sambastudio/latest/open-ai-api.html)                            |        ✅         |     ⚠️      |     ✅      |        ❌         |         ❌          | Via OpenAI-compatible API                                                                                            |
-| [Anthropic](https://docs.claude.com/en/home)                                                          |        ✅         |      ❌      |     ❌      |        ❌         |         ✅          | Via OpenAI-compatible API and Native Anthropic API                                                                   |
+| ----------------------------------------------------------------------------------------------------- | :--------------: | :---------: | :--------: | :--------------: | :----------------: | -------------------------------------------------------------------------------------------------------------------- |
+| [OpenAI](https://platform.openai.com/docs/api-reference)                                              |        ✅        |     ✅      |     ✅     |        ✅        |         ❌         |                                                                                                                      |
+| [AWS Bedrock](https://docs.aws.amazon.com/bedrock/latest/APIReference/)                               |        ✅        |     🚧      |     🚧     |        ❌        |         ❌         | Via API translation                                                                                                  |
+| [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference)                  |        ✅        |     🚧      |     ✅     |        ⚠️        |         ❌         | Via API translation or via [OpenAI-compatible API](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/latest) |
+| [Google Gemini](https://ai.google.dev/gemini-api/docs/openai)                                         |        ✅        |     ⚠️      |     ✅     |        ⚠️        |         ❌         | Via OpenAI-compatible API                                                                                            |
+| [Groq](https://console.groq.com/docs/openai)                                                          |        ✅        |     ❌      |     ❌     |        ❌        |         ❌         | Via OpenAI-compatible API                                                                                            |
+| [Grok](https://docs.x.ai/docs/api-reference)                                                          |        ✅        |     ⚠️      |     ❌     |        ⚠️        |         ❌         | Via OpenAI-compatible API                                                                                            |
+| [Together AI](https://docs.together.ai/docs/openai-api-compatibility)                                 |        ⚠️        |     ⚠️      |     ⚠️     |        ⚠️        |         ❌         | Via OpenAI-compatible API                                                                                            |
+| [Cohere](https://docs.cohere.com/v2/docs/compatibility-api)                                           |        ⚠️        |     ⚠️      |     ⚠️     |        ❌        |         ❌         | Via OpenAI-compatible API                                                                                            |
+| [Mistral](https://docs.mistral.ai/api/)                                                               |        ⚠️        |     ⚠️      |     ⚠️     |        ❌        |         ❌         | Via OpenAI-compatible API                                                                                            |
+| [DeepInfra](https://deepinfra.com/docs/inference)                                                     |        ✅        |     ⚠️      |     ✅     |        ⚠️        |         ❌         | Via OpenAI-compatible API                                                                                            |
+| [DeepSeek](https://api-docs.deepseek.com/)                                                            |        ⚠️        |     ⚠️      |     ❌     |        ❌        |         ❌         | Via OpenAI-compatible API                                                                                            |
+| [Hunyuan](https://cloud.tencent.com/document/product/1729/111007)                                     |        ⚠️        |     ⚠️      |     ⚠️     |        ❌        |         ❌         | Via OpenAI-compatible API                                                                                            |
+| [Tencent LLM Knowledge Engine](https://www.tencentcloud.com/document/product/1255/70381)              |        ⚠️        |     ❌      |     ❌     |        ❌        |         ❌         | Via OpenAI-compatible API                                                                                            |
+| [Tetrate Agent Router Service (TARS)](https://router.tetrate.ai/)                                     |        ⚠️        |     ⚠️      |     ⚠️     |        ❌        |         ❌         | Via OpenAI-compatible API                                                                                            |
+| [Google Vertex AI](https://cloud.google.com/vertex-ai/docs/reference/rest)                            |        ✅        |     🚧      |     🚧     |        ❌        |         ❌         | Via OpenAI-compatible API                                                                                            |
+| [Anthropic on Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude) |        ✅        |     ❌      |     🚧     |        ❌        |         ✅         | Via OpenAI-compatible API and Native Anthropic API                                                                   |
+| [Anthropic on AWS Bedrock](https://aws.amazon.com/bedrock/anthropic/)                                 |        🚧        |     ❌      |     ❌     |        ❌        |         ✅         | Native Anthropic API                                                                                                 |
+| [SambaNova](https://docs.sambanova.ai/sambastudio/latest/open-ai-api.html)                            |        ✅        |     ⚠️      |     ✅     |        ❌        |         ❌         | Via OpenAI-compatible API                                                                                            |
+| [Anthropic](https://docs.claude.com/en/home)                                                          |        ✅        |     ❌      |     ❌     |        ❌        |         ✅         | Via OpenAI-compatible API and Native Anthropic API                                                                   |
 
 - ✅ - Supported and Tested on Envoy AI Gateway CI
 - ⚠️️ - Expected to work based on provider documentation, but not tested on the CI.
diff --git a/tests/extproc/envoy.yaml b/tests/extproc/envoy.yaml
index c376fb2c72..29e2674c4b 100644
--- a/tests/extproc/envoy.yaml
+++ b/tests/extproc/envoy.yaml
@@ -58,6 +58,24 @@ static_resources:
                           route:
                             auto_host_rewrite: true
                             cluster: aws_bedrock
+                        - match:
+                            prefix: "/"
+                            headers:
+                              - name: x-ai-eg-model
+                                string_match:
+                                  exact: claude-sonnet-4-5
+                          route:
+                            auto_host_rewrite: true
+                            cluster: anthropic
+                        - match:
+                            prefix: "/"
+                            headers:
+                              - name: x-ai-eg-model
+                                string_match:
+                                  exact: global.anthropic.claude-sonnet-4-5-20250929-v1:0
+                          route:
+                            auto_host_rewrite: true
+                            cluster: anthropic_aws_bedrock
                         - match:
                             prefix: "/"
                             headers:
@@ -728,6 +746,65 @@ static_resources:
                   filter_metadata:
                     aigateway.envoy.io:
                       per_route_rule_backend_name: "aws-bedrock"
+    - name: anthropic_aws_bedrock
+      connect_timeout: 30s
+      type: STRICT_DNS
+      outlier_detection:
+        consecutive_5xx: 1
+        interval: 1s
+        base_ejection_time: 2s # Must be smaller than the require.Eventually's interval. Otherwise, the tests may pass without going through the fallback since the always-failing backend could be ejected by the time when require.Eventually retries due to the previous request IF the retry is not configured.
+        max_ejection_percent: 100
+      typed_extension_protocol_options:
+        envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
+          "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
+          explicit_http_config:
+            http_protocol_options: {}
+          http_filters:
+            - name: upstream_extproc
+              typed_config:
+                "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
+                allow_mode_override: true
+                request_attributes:
+                  - xds.upstream_host_metadata
+                processing_mode:
+                  request_header_mode: "SEND"
+                  request_body_mode: "NONE"
+                  response_header_mode: "SKIP"
+                  response_body_mode: "NONE"
+                grpc_service:
+                  envoy_grpc:
+                    cluster_name: extproc_cluster
+                metadataOptions:
+                  receivingNamespaces:
+                    untyped:
+                      - io.envoy.ai_gateway
+            - name: envoy.filters.http.header_mutation
+              typed_config:
+                "@type": type.googleapis.com/envoy.extensions.filters.http.header_mutation.v3.HeaderMutation
+                mutations:
+                  request_mutations:
+                    - append:
+                        append_action: ADD_IF_ABSENT
+                        header:
+                          key: content-length
+                          value: "%DYNAMIC_METADATA(io.envoy.ai_gateway:content_length)%"
+            - name: envoy.filters.http.upstream_codec
+              typed_config:
+                "@type": type.googleapis.com/envoy.extensions.filters.http.upstream_codec.v3.UpstreamCodec
+      load_assignment:
+        cluster_name: anthropic_aws_bedrock
+        endpoints:
+          - lb_endpoints:
+              - endpoint:
+                  hostname: bedrock-runtime.us-east-1.amazonaws.com
+                  address:
+                    socket_address:
+                      address: bedrock-runtime.us-east-1.amazonaws.com
+                      port_value: 443
+                metadata:
+                  filter_metadata:
+                    aigateway.envoy.io:
+                      per_route_rule_backend_name: "anthropic-aws-bedrock"
       transport_socket:
         name: envoy.transport_sockets.tls
         typed_config:
@@ -985,6 +1062,69 @@ static_resources:
         typed_config:
           "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
           auto_host_sni: true
+    - name: anthropic
+      connect_timeout: 30s
+      type: STRICT_DNS
+      outlier_detection:
+        consecutive_5xx: 1
+        interval: 1s
+        base_ejection_time: 2s # Must be smaller than the require.Eventually's interval. Otherwise, the tests may pass without going through the fallback since the always-failing backend could be ejected by the time when require.Eventually retries due to the previous request IF the retry is not configured.
+        max_ejection_percent: 100
+      typed_extension_protocol_options:
+        envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
+          "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
+          explicit_http_config:
+            http_protocol_options: {}
+          http_filters:
+            - name: upstream_extproc
+              typed_config:
+                "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
+                request_attributes:
+                  - xds.upstream_host_metadata
+                processing_mode:
+                  request_header_mode: "SEND"
+                  request_body_mode: "NONE"
+                  response_header_mode: "SKIP"
+                  response_body_mode: "NONE"
+                grpc_service:
+                  envoy_grpc:
+                    cluster_name: extproc_cluster
+                metadataOptions:
+                  receivingNamespaces:
+                    untyped:
+                      - io.envoy.ai_gateway
+            - name: envoy.filters.http.header_mutation
+              typed_config:
+                "@type": type.googleapis.com/envoy.extensions.filters.http.header_mutation.v3.HeaderMutation
+                mutations:
+                  request_mutations:
+                    - append:
+                        append_action: ADD_IF_ABSENT
+                        header:
+                          key: content-length
+                          value: "%DYNAMIC_METADATA(io.envoy.ai_gateway:content_length)%"
+            - name: envoy.filters.http.upstream_codec
+              typed_config:
+                "@type": type.googleapis.com/envoy.extensions.filters.http.upstream_codec.v3.UpstreamCodec
+      load_assignment:
+        cluster_name: anthropic
+        endpoints:
+          - lb_endpoints:
+              - endpoint:
+                  hostname: api.anthropic.com
+                  address:
+                    socket_address:
+                      address: api.anthropic.com
+                      port_value: 443
+                metadata:
+                  filter_metadata:
+                    aigateway.envoy.io:
+                      per_route_rule_backend_name: "anthropic"
+      transport_socket:
+        name: envoy.transport_sockets.tls
+        typed_config:
+          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
+          auto_host_sni: true
     - name: azure_openai
       connect_timeout: 30s
       type: STRICT_DNS
diff --git a/tests/extproc/extproc_test.go b/tests/extproc/extproc_test.go
index 2306d0242c..ebe43bf053 100644
--- a/tests/extproc/extproc_test.go
+++ b/tests/extproc/extproc_test.go
@@ -45,6 +45,7 @@ var (
 	grokSchema           = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1"}
 	sambaNovaSchema      = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1"}
 	deepInfraSchema      = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1/openai"}
+	anthropicSchema      = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAnthropic}
 
 	testUpstreamOpenAIBackend      = filterapi.Backend{Name: "testupstream-openai", Schema: openAISchema}
 	testUpstreamModelNameOverride  = filterapi.Backend{Name: "testupstream-modelname-override", ModelNameOverride: "override-model", Schema: openAISchema}
diff --git a/tests/extproc/real_providers_test.go b/tests/extproc/real_providers_test.go
index fc51add5a4..7c3fd4b37c 100644
--- a/tests/extproc/real_providers_test.go
+++ b/tests/extproc/real_providers_test.go
@@ -14,6 +14,8 @@ import (
 	"testing"
 	"time"
 
+	"github.com/anthropics/anthropic-sdk-go"
+	anthropicoption "github.com/anthropics/anthropic-sdk-go/option"
 	"github.com/openai/openai-go"
 	"github.com/openai/openai-go/option"
 	"github.com/stretchr/testify/assert"
@@ -46,10 +48,17 @@ func TestWithRealProviders(t *testing.T) {
 			{Name: "openai", Schema: openAISchema, Auth: &filterapi.BackendAuth{
 				APIKey: &filterapi.APIKeyAuth{Key: cc.OpenAIAPIKey},
 			}},
+			{Name: "anthropic", Schema: anthropicSchema, Auth: &filterapi.BackendAuth{
+				AnthropicAPIKey: &filterapi.AnthropicAPIKeyAuth{Key: cc.AnthropicAPIKey},
+			}},
 			{Name: "aws-bedrock", Schema: awsBedrockSchema, Auth: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{
 				CredentialFileLiteral: cc.AWSFileLiteral,
 				Region:                "us-east-1",
 			}}},
+			{Name: "anthropic-aws-bedrock", Schema: awsAnthropicSchema, Auth: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{
+				CredentialFileLiteral: cc.AWSFileLiteral,
+				Region:                "us-east-1",
+			}}},
 			{Name: "azure-openai", Schema: azureOpenAISchema, Auth: &filterapi.BackendAuth{
 				AzureAuth: &filterapi.AzureAuth{AccessToken: cc.AzureAccessToken},
 			}},
@@ -119,6 +128,17 @@ func TestWithRealProviders(t *testing.T) {
 				})
 			}
 		})
+		t.Run("messages", func(t *testing.T) {
+			for _, tc := range []realProvidersTestCase{
+				{name: "anthropic", modelName: "claude-sonnet-4-5", required: internaltesting.RequiredCredentialAnthropic},
+				{name: "anthropic-aws-bedrock", modelName: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", required: internaltesting.RequiredCredentialAWS},
+			} {
+				t.Run(tc.name, func(t *testing.T) {
+					cc.MaybeSkip(t, tc.required)
+					requireEventuallyMessagesNonStreamingRequestOK(t, listenerAddress, tc.modelName)
+				})
+			}
+		})
 	})
 
 	// Read all access logs and check if the used token is logged.
@@ -362,6 +382,33 @@ func requireEventuallyChatCompletionNonStreamingRequestOK(t *testing.T, listener
 	}, realProvidersEventuallyTimeout, realProvidersEventuallyInterval)
 }
 
+func requireEventuallyMessagesNonStreamingRequestOK(t *testing.T, listenerAddress, modelName string) {
+	client := anthropic.NewClient(
+		anthropicoption.WithAPIKey("dummy"),
+		anthropicoption.WithBaseURL(listenerAddress+"/anthropic/"),
+	)
+	internaltesting.RequireEventuallyNoError(t, func() error {
+		message, err := client.Messages.New(t.Context(), anthropic.MessageNewParams{
+			MaxTokens: 1024,
+			Messages: []anthropic.MessageParam{
+				anthropic.NewUserMessage(anthropic.NewTextBlock("Say hi!")),
+			},
+			Model: anthropic.Model(modelName),
+		})
+		if err != nil {
+			t.Logf("messages error: %v", err)
+			return fmt.Errorf("messages error: %w", err)
+		}
+
+		if len(message.Content) == 0 {
+			return fmt.Errorf("empty message content in response")
+		}
+
+		t.Logf("response: %+v", message.Content)
+		return nil
+	}, realProvidersEventuallyTimeout, realProvidersEventuallyInterval)
+}
+
 func requireEventuallyEmbeddingsRequestOK(t *testing.T, listenerAddress, modelName string) {
 	client := openai.NewClient(option.WithBaseURL(listenerAddress + "/v1/"))
 	require.Eventually(t, func() bool {
diff --git a/tests/extproc/testupstream_test.go b/tests/extproc/testupstream_test.go
index 9c0f1b3251..7bcaadf041 100644
--- a/tests/extproc/testupstream_test.go
+++ b/tests/extproc/testupstream_test.go
@@ -893,7 +893,7 @@ data: {"type": "message_stop"}
 			method:            http.MethodPost,
 			expRequestHeaders: map[string]string{"x-api-key": "anthropic-api-key"},
 			requestBody: `{
-    "model": "claude-sonnet-4-5",
+    "model": "foo",
     "max_tokens": 1000,
     "messages": [
       {
@@ -903,7 +903,7 @@ data: {"type": "message_stop"}
     ]
   }`,
 			expPath:      "/v1/messages",
-			responseBody: `{"model":"claude-sonnet-4-5-20250929","id":"msg_01J5gW6Sffiem6avXSAooZZw","type":"message","role":"assistant","content":[{"type":"text","text":"Hi! 👋 How can I help you today?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":16,"service_tier":"standard"}}`,
+			responseBody: `{"model":"foo","id":"msg_01J5gW6Sffiem6avXSAooZZw","type":"message","role":"assistant","content":[{"type":"text","text":"Hi! 👋 How can I help you today?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":16,"service_tier":"standard"}}`,
 			expStatus:    http.StatusOK,
 		},
 		{
@@ -914,7 +914,7 @@ data: {"type": "message_stop"}
 			expRequestHeaders: map[string]string{"x-api-key": "anthropic-api-key"},
 			responseType:      "sse",
 			requestBody: `{
-    "model": "claude-sonnet-4-5",
+    "model": "foo",
     "max_tokens": 1000,
     "messages": [
       {
@@ -926,7 +926,7 @@ data: {"type": "message_stop"}
 			expPath: "/v1/messages",
 			responseBody: `
 event: message_start
-data: {"type":"message_start","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_01BfvfMsg2gBzwsk6PZRLtDg","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}}    }
+data: {"type":"message_start","message":{"model":"foo","id":"msg_01BfvfMsg2gBzwsk6PZRLtDg","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}}    }
 
 event: content_block_start
 data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}      }
@@ -960,7 +960,7 @@ data: {"type":"message_stop"       }
 			path:            "/anthropic/v1/messages",
 			method:          http.MethodPost,
 			requestBody:     `{"model":"anthropic.claude-3-sonnet-20240229-v1:0","max_tokens":100,"messages":[{"role":"user","content":[{"type":"text","text":"Hello from AWS!"}]}],"stream":false}`,
-			expRequestBody:  `{"anthropic_version":"bedrock-2023-05-31","max_tokens":100,"messages":[{"content":[{"text":"Hello from AWS!","type":"text"}],"role":"user"}],"stream":false}`,
+			expRequestBody:  `{"max_tokens":100,"messages":[{"role":"user","content":[{"type":"text","text":"Hello from AWS!"}]}],"stream":false,"anthropic_version":"bedrock-2023-05-31"}`,
 			expPath:         "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
 			responseStatus:  strconv.Itoa(http.StatusOK),
 			responseBody:    `{"id":"msg_aws_123","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hello from AWS Anthropic!"}],"usage":{"input_tokens":10,"output_tokens":20}}`,
@@ -974,7 +974,7 @@ data: {"type":"message_stop"       }
 			method:         http.MethodPost,
 			responseType:   "sse",
 			requestBody:    `{"model":"anthropic.claude-3-haiku-20240307-v1:0","max_tokens":150,"messages":[{"role":"user","content":[{"type":"text","text":"Tell me a joke"}]}],"stream":true}`,
-			expRequestBody: `{"anthropic_version":"bedrock-2023-05-31","max_tokens":150,"messages":[{"content":[{"text":"Tell me a joke","type":"text"}],"role":"user"}],"stream":true}`,
+			expRequestBody: `{"max_tokens":150,"messages":[{"role":"user","content":[{"type":"text","text":"Tell me a joke"}]}],"stream":true,"anthropic_version":"bedrock-2023-05-31"}`,
 			expPath:        "/model/anthropic.claude-3-haiku-20240307-v1:0/invoke-stream",
 			responseStatus: strconv.Itoa(http.StatusOK),
 			responseBody: `event: message_start
@@ -1023,19 +1023,6 @@ data: {"type":"message_stop"}
 
 `,
 		},
-		{
-			name:            "aws-anthropic - /anthropic/v1/messages - ARN model format",
-			backend:         "aws-anthropic",
-			path:            "/anthropic/v1/messages",
-			method:          http.MethodPost,
-			requestBody:     `{"model":"arn:aws:bedrock:eu-central-1:538639307912:application-inference-profile/k375tnm6nr0t","max_tokens":50,"messages":[{"role":"user","content":[{"type":"text","text":"Hi"}]}],"stream":false}`,
-			expRequestBody:  `{"anthropic_version":"bedrock-2023-05-31","max_tokens":50,"messages":[{"content":[{"text":"Hi","type":"text"}],"role":"user"}],"stream":false}`,
-			expPath:         "/model/arn:aws:bedrock:eu-central-1:538639307912:application-inference-profile%2Fk375tnm6nr0t/invoke",
-			responseStatus:  strconv.Itoa(http.StatusOK),
-			responseBody:    `{"id":"msg_arn_789","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hi there!"}],"usage":{"input_tokens":5,"output_tokens":8}}`,
-			expStatus:       http.StatusOK,
-			expResponseBody: `{"id":"msg_arn_789","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hi there!"}],"usage":{"input_tokens":5,"output_tokens":8}}`,
-		},
 		{
 			name:            "aws-anthropic - /anthropic/v1/messages - error response",
 			backend:         "aws-anthropic",

From c6c6aad9acc4cff501f00a296491112b70a8fce6 Mon Sep 17 00:00:00 2001
From: Takeshi Yoneda <t.y.mathetake@gmail.com>
Date: Wed, 29 Oct 2025 14:38:04 -0700
Subject: [PATCH 15/15] cleanup

Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
---
 tests/extproc/extproc_test.go | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/extproc/extproc_test.go b/tests/extproc/extproc_test.go
index ebe43bf053..3979e225f5 100644
--- a/tests/extproc/extproc_test.go
+++ b/tests/extproc/extproc_test.go
@@ -61,10 +61,8 @@ var (
 		Region:      "gcp-region",
 		ProjectName: "gcp-project-name",
 	}}}
-	testUpstreamAWSAnthropicBackend = filterapi.Backend{Name: "testupstream-aws-anthropic", Schema: awsAnthropicSchema, Auth: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{
-		Region: "us-east-1",
-	}}}
-	alwaysFailingBackend = filterapi.Backend{Name: "always-failing-backend", Schema: openAISchema}
+	testUpstreamAWSAnthropicBackend = filterapi.Backend{Name: "testupstream-aws-anthropic", Schema: awsAnthropicSchema}
+	alwaysFailingBackend            = filterapi.Backend{Name: "always-failing-backend", Schema: openAISchema}
 
 	// envoyConfig is the embedded Envoy configuration template.
 	//