diff --git a/api/v1alpha1/shared_types.go b/api/v1alpha1/shared_types.go
index 596a6a56e2..98e97942d1 100644
--- a/api/v1alpha1/shared_types.go
+++ b/api/v1alpha1/shared_types.go
@@ -15,7 +15,7 @@ package v1alpha1
type VersionedAPISchema struct {
// Name is the name of the API schema of the AIGatewayRoute or AIServiceBackend.
//
- // +kubebuilder:validation:Enum=OpenAI;AWSBedrock;AzureOpenAI;GCPVertexAI;GCPAnthropic;Anthropic
+ // +kubebuilder:validation:Enum=OpenAI;AWSBedrock;AzureOpenAI;GCPVertexAI;GCPAnthropic;Anthropic;AWSAnthropic
Name APISchema `json:"name"`
// Version is the version of the API schema.
@@ -65,6 +65,12 @@ const (
// APISchemaAnthropic is the native Anthropic API schema.
// https://docs.claude.com/en/home
APISchemaAnthropic APISchema = "Anthropic"
+ // APISchemaAWSAnthropic is the schema for Anthropic models hosted on AWS Bedrock.
+ // Uses the native Anthropic Messages API format for requests and responses.
+ //
+ // https://aws.amazon.com/bedrock/anthropic/
+ // https://docs.claude.com/en/api/claude-on-amazon-bedrock
+ APISchemaAWSAnthropic APISchema = "AWSAnthropic"
)
const (
diff --git a/examples/basic/aws.yaml b/examples/basic/aws.yaml
index 7bc37a4b2b..784972326c 100644
--- a/examples/basic/aws.yaml
+++ b/examples/basic/aws.yaml
@@ -23,6 +23,25 @@ spec:
- name: envoy-ai-gateway-basic-aws
---
apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: AIGatewayRoute
+metadata:
+ name: envoy-ai-gateway-basic-aws-bedrock-anthropic
+ namespace: default
+spec:
+ parentRefs:
+ - name: envoy-ai-gateway-basic
+ kind: Gateway
+ group: gateway.networking.k8s.io
+ rules:
+ - matches:
+ - headers:
+ - type: Exact
+ name: x-ai-eg-model
+ value: anthropic.claude-3-5-sonnet-20241022-v2:0
+ backendRefs:
+ - name: envoy-ai-gateway-basic-aws-bedrock-anthropic
+---
+apiVersion: aigateway.envoyproxy.io/v1alpha1
kind: AIServiceBackend
metadata:
name: envoy-ai-gateway-basic-aws
@@ -36,6 +55,20 @@ spec:
group: gateway.envoyproxy.io
---
apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: AIServiceBackend
+metadata:
+ name: envoy-ai-gateway-basic-aws-bedrock-anthropic
+ namespace: default
+spec:
+ schema:
+ name: AWSAnthropic
+ version: bedrock-2023-05-31
+ backendRef:
+ name: envoy-ai-gateway-basic-aws
+ kind: Backend
+ group: gateway.envoyproxy.io
+---
+apiVersion: aigateway.envoyproxy.io/v1alpha1
kind: BackendSecurityPolicy
metadata:
name: envoy-ai-gateway-basic-aws-credentials
@@ -45,6 +78,9 @@ spec:
- group: aigateway.envoyproxy.io
kind: AIServiceBackend
name: envoy-ai-gateway-basic-aws
+ - group: aigateway.envoyproxy.io
+ kind: AIServiceBackend
+ name: envoy-ai-gateway-basic-aws-bedrock-anthropic
type: AWSCredentials
awsCredentials:
region: us-east-1
@@ -85,6 +121,7 @@ metadata:
type: Opaque
stringData:
# Replace this with your AWS credentials.
+ # You can also use AWS IAM roles for service accounts (IRSA) in EKS.
credentials: |
[default]
aws_access_key_id = AWS_ACCESS_KEY_ID
diff --git a/internal/extproc/messages_processor.go b/internal/extproc/messages_processor.go
index 9a5ea3eb72..f6b48ec54f 100644
--- a/internal/extproc/messages_processor.go
+++ b/internal/extproc/messages_processor.go
@@ -157,10 +157,13 @@ func (c *messagesProcessorUpstreamFilter) selectTranslator(out filterapi.Version
// Anthropic → GCP Anthropic (request direction translator).
// Uses backend config version (GCP Vertex AI requires specific versions like "vertex-2023-10-16").
c.translator = translator.NewAnthropicToGCPAnthropicTranslator(out.Version, c.modelNameOverride)
+ case filterapi.APISchemaAWSAnthropic:
+ // Anthropic → AWS Bedrock Anthropic (request direction translator).
+ c.translator = translator.NewAnthropicToAWSAnthropicTranslator(out.Version, c.modelNameOverride)
case filterapi.APISchemaAnthropic:
c.translator = translator.NewAnthropicToAnthropicTranslator(out.Version, c.modelNameOverride)
default:
- return fmt.Errorf("/v1/messages endpoint only supports backends that return native Anthropic format (GCPAnthropic). Backend %s uses different model format", out.Name)
+ return fmt.Errorf("/v1/messages endpoint only supports backends that return native Anthropic format (Anthropic, GCPAnthropic, AWSAnthropic). Backend %s uses different model format", out.Name)
}
return nil
}
diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go
new file mode 100644
index 0000000000..891f7996eb
--- /dev/null
+++ b/internal/extproc/translator/anthropic_awsanthropic.go
@@ -0,0 +1,78 @@
+// Copyright Envoy AI Gateway Authors
+// SPDX-License-Identifier: Apache-2.0
+// The full text of the Apache license is available in the LICENSE file at
+// the root of the repo.
+
+package translator
+
+import (
+ "cmp"
+ "fmt"
+ "net/url"
+
+ corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
+ extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
+ "github.com/tidwall/sjson"
+
+ anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic"
+ "github.com/envoyproxy/ai-gateway/internal/internalapi"
+)
+
+// NewAnthropicToAWSAnthropicTranslator creates a translator for Anthropic to AWS Bedrock Anthropic format.
+// AWS Bedrock supports the native Anthropic Messages API, so this is essentially a passthrough
+// translator with AWS-specific path modifications.
+func NewAnthropicToAWSAnthropicTranslator(apiVersion string, modelNameOverride internalapi.ModelNameOverride) AnthropicMessagesTranslator {
+ anthropicTranslator := NewAnthropicToAnthropicTranslator(apiVersion, modelNameOverride).(*anthropicToAnthropicTranslator)
+ return &anthropicToAWSAnthropicTranslator{
+ apiVersion: apiVersion,
+ anthropicToAnthropicTranslator: *anthropicTranslator,
+ }
+}
+
+type anthropicToAWSAnthropicTranslator struct {
+ anthropicToAnthropicTranslator
+ apiVersion string
+}
+
+// RequestBody implements [AnthropicMessagesTranslator.RequestBody] for Anthropic to AWS Bedrock Anthropic translation.
+// This handles the transformation from native Anthropic format to AWS Bedrock format.
+// https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages-request-response.html
+func (a *anthropicToAWSAnthropicTranslator) RequestBody(rawBody []byte, body *anthropicschema.MessagesRequest, _ bool) (
+ headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, err error,
+) {
+ a.stream = body.GetStream()
+ a.requestModel = cmp.Or(a.modelNameOverride, body.GetModel())
+
+ var mutatedBody []byte
+ mutatedBody, err = sjson.SetBytes(rawBody, anthropicVersionKey, a.apiVersion)
+ if err != nil {
+ return nil, nil, fmt.Errorf("failed to set anthropic_version field: %w", err)
+ }
+ // Remove the model field from the body as AWS Bedrock expects the model to be specified in the path.
+ // Otherwise, AWS complains "extra inputs are not permitted".
+ mutatedBody, _ = sjson.DeleteBytes(mutatedBody, "model")
+
+ // Determine the AWS Bedrock path based on whether streaming is requested.
+ var pathTemplate string
+ if body.GetStream() {
+ pathTemplate = "/model/%s/invoke-stream"
+ } else {
+ pathTemplate = "/model/%s/invoke"
+ }
+
+ // URL encode the model ID for the path to handle ARNs with special characters.
+ // AWS Bedrock model IDs can be simple names (e.g., "anthropic.claude-3-5-sonnet-20241022-v2:0")
+ // or full ARNs which may contain special characters.
+ encodedModelID := url.PathEscape(a.requestModel)
+ path := fmt.Sprintf(pathTemplate, encodedModelID)
+
+ headerMutation = &extprocv3.HeaderMutation{
+ SetHeaders: []*corev3.HeaderValueOption{
+ // Overwriting path of the Anthropic to Anthropic translator
+ {Header: &corev3.HeaderValue{Key: ":path", RawValue: []byte(path)}},
+ },
+ }
+ bodyMutation = &extprocv3.BodyMutation{Mutation: &extprocv3.BodyMutation_Body{Body: mutatedBody}}
+ setContentLength(headerMutation, mutatedBody)
+ return
+}
diff --git a/internal/extproc/translator/anthropic_awsanthropic_test.go b/internal/extproc/translator/anthropic_awsanthropic_test.go
new file mode 100644
index 0000000000..90097c1f0c
--- /dev/null
+++ b/internal/extproc/translator/anthropic_awsanthropic_test.go
@@ -0,0 +1,227 @@
+// Copyright Envoy AI Gateway Authors
+// SPDX-License-Identifier: Apache-2.0
+// The full text of the Apache license is available in the LICENSE file at
+// the root of the repo.
+
+package translator
+
+import (
+ "encoding/json"
+ "testing"
+
+ "github.com/anthropics/anthropic-sdk-go"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+
+ anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic"
+)
+
+func TestAnthropicToAWSAnthropicTranslator_RequestBody_ModelNameOverride(t *testing.T) {
+ tests := []struct {
+ name string
+ override string
+ inputModel string
+ expectedModel string
+ expectedInPath string
+ }{
+ {
+ name: "no override uses original model",
+ override: "",
+ inputModel: "anthropic.claude-3-haiku-20240307-v1:0",
+ expectedModel: "anthropic.claude-3-haiku-20240307-v1:0",
+ expectedInPath: "anthropic.claude-3-haiku-20240307-v1:0",
+ },
+ {
+ name: "override replaces model in body and path",
+ override: "anthropic.claude-3-sonnet-20240229-v1:0",
+ inputModel: "anthropic.claude-3-haiku-20240307-v1:0",
+ expectedModel: "anthropic.claude-3-sonnet-20240229-v1:0",
+ expectedInPath: "anthropic.claude-3-sonnet-20240229-v1:0",
+ },
+ {
+ name: "override with empty input model",
+ override: "anthropic.claude-3-opus-20240229-v1:0",
+ inputModel: "",
+ expectedModel: "anthropic.claude-3-opus-20240229-v1:0",
+ expectedInPath: "anthropic.claude-3-opus-20240229-v1:0",
+ },
+ {
+ name: "model with ARN format",
+ override: "",
+ inputModel: "arn:aws:bedrock:eu-central-1:000000000:application-inference-profile/aaaaaaaaa",
+ expectedModel: "arn:aws:bedrock:eu-central-1:000000000:application-inference-profile/aaaaaaaaa",
+ expectedInPath: "arn:aws:bedrock:eu-central-1:000000000:application-inference-profile%2Faaaaaaaaa",
+ },
+ {
+ name: "global model ID",
+ override: "",
+ inputModel: "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+ expectedModel: "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+ expectedInPath: "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", tt.override)
+
+ // Create the request using map structure.
+ originalReq := &anthropicschema.MessagesRequest{
+ "model": tt.inputModel,
+ "messages": []anthropic.MessageParam{
+ {
+ Role: anthropic.MessageParamRoleUser,
+ Content: []anthropic.ContentBlockParamUnion{
+ anthropic.NewTextBlock("Hello"),
+ },
+ },
+ },
+ }
+
+ rawBody, err := json.Marshal(originalReq)
+ require.NoError(t, err)
+
+ headerMutation, bodyMutation, err := translator.RequestBody(rawBody, originalReq, false)
+ require.NoError(t, err)
+ require.NotNil(t, headerMutation)
+ require.NotNil(t, bodyMutation)
+
+ // Check path header contains expected model (URL encoded).
+ // Use the last element as it takes precedence when multiple headers are set.
+ pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-2]
+ require.Equal(t, ":path", pathHeader.Header.Key)
+ expectedPath := "/model/" + tt.expectedInPath + "/invoke"
+ assert.Equal(t, expectedPath, string(pathHeader.Header.RawValue))
+
+ // Check that model field is removed from body (since it's in the path).
+ var modifiedReq map[string]any
+ err = json.Unmarshal(bodyMutation.GetBody(), &modifiedReq)
+ require.NoError(t, err)
+ _, hasModel := modifiedReq["model"]
+ assert.False(t, hasModel, "model field should be removed from request body")
+
+ // Verify anthropic_version field is added (required by AWS Bedrock).
+ version, hasVersion := modifiedReq["anthropic_version"]
+ assert.True(t, hasVersion, "anthropic_version should be added for AWS Bedrock")
+ assert.Equal(t, "bedrock-2023-05-31", version, "anthropic_version should match the configured version")
+ })
+ }
+}
+
+func TestAnthropicToAWSAnthropicTranslator_RequestBody_StreamingPaths(t *testing.T) {
+ tests := []struct {
+ name string
+ stream any
+ expectedPathSuffix string
+ }{
+ {
+ name: "non-streaming uses /invoke",
+ stream: false,
+ expectedPathSuffix: "/invoke",
+ },
+ {
+ name: "streaming uses /invoke-stream",
+ stream: true,
+ expectedPathSuffix: "/invoke-stream",
+ },
+ {
+ name: "missing stream defaults to /invoke",
+ stream: nil,
+ expectedPathSuffix: "/invoke",
+ },
+ {
+ name: "non-boolean stream defaults to /invoke",
+ stream: "true",
+ expectedPathSuffix: "/invoke",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
+
+ parsedReq := &anthropicschema.MessagesRequest{
+ "model": "anthropic.claude-3-sonnet-20240229-v1:0",
+ "messages": []anthropic.MessageParam{
+ {
+ Role: anthropic.MessageParamRoleUser,
+ Content: []anthropic.ContentBlockParamUnion{
+ anthropic.NewTextBlock("Test"),
+ },
+ },
+ },
+ }
+ if tt.stream != nil {
+ if streamVal, ok := tt.stream.(bool); ok {
+ (*parsedReq)["stream"] = streamVal
+ }
+ }
+
+ rawBody, err := json.Marshal(parsedReq)
+ require.NoError(t, err)
+
+ headerMutation, _, err := translator.RequestBody(rawBody, parsedReq, false)
+ require.NoError(t, err)
+ require.NotNil(t, headerMutation)
+
+ // Check path contains expected suffix.
+ // Use the last element as it takes precedence when multiple headers are set.
+ pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-2]
+ expectedPath := "/model/anthropic.claude-3-sonnet-20240229-v1:0" + tt.expectedPathSuffix
+ assert.Equal(t, expectedPath, string(pathHeader.Header.RawValue))
+ })
+ }
+}
+
+func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) {
+ tests := []struct {
+ name string
+ modelID string
+ expectedPath string
+ }{
+ {
+ name: "simple model ID with colon",
+ modelID: "anthropic.claude-3-sonnet-20240229-v1:0",
+ expectedPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
+ },
+ {
+ name: "full ARN with multiple special characters",
+ modelID: "arn:aws:bedrock:us-east-1:123456789012:foundation-model/anthropic.claude-3-sonnet-20240229-v1:0",
+ expectedPath: "/model/arn:aws:bedrock:us-east-1:123456789012:foundation-model%2Fanthropic.claude-3-sonnet-20240229-v1:0/invoke",
+ },
+ {
+ name: "global model prefix",
+ modelID: "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+ expectedPath: "/model/global.anthropic.claude-sonnet-4-5-20250929-v1:0/invoke",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "")
+
+ originalReq := &anthropicschema.MessagesRequest{
+ "model": tt.modelID,
+ "messages": []anthropic.MessageParam{
+ {
+ Role: anthropic.MessageParamRoleUser,
+ Content: []anthropic.ContentBlockParamUnion{
+ anthropic.NewTextBlock("Test"),
+ },
+ },
+ },
+ }
+
+ rawBody, err := json.Marshal(originalReq)
+ require.NoError(t, err)
+
+ headerMutation, _, err := translator.RequestBody(rawBody, originalReq, false)
+ require.NoError(t, err)
+ require.NotNil(t, headerMutation)
+
+ // Use the last element as it takes precedence when multiple headers are set.
+ pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-2]
+ assert.Equal(t, tt.expectedPath, string(pathHeader.Header.RawValue))
+ })
+ }
+}
diff --git a/internal/filterapi/filterconfig.go b/internal/filterapi/filterconfig.go
index 25ffc0c383..ad312c31d9 100644
--- a/internal/filterapi/filterconfig.go
+++ b/internal/filterapi/filterconfig.go
@@ -117,6 +117,9 @@ const (
APISchemaGCPAnthropic APISchemaName = "GCPAnthropic"
// APISchemaAnthropic represents the standard Anthropic API schema.
APISchemaAnthropic APISchemaName = "Anthropic"
+ // APISchemaAWSAnthropic represents the AWS Bedrock Anthropic API schema.
+ // Used for Claude models hosted on AWS Bedrock using the native Anthropic Messages API.
+ APISchemaAWSAnthropic APISchemaName = "AWSAnthropic"
)
// RouteRuleName is the name of the route rule.
diff --git a/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aiservicebackends.yaml b/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aiservicebackends.yaml
index 6c2cf79190..f46b75d026 100644
--- a/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aiservicebackends.yaml
+++ b/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aiservicebackends.yaml
@@ -235,6 +235,7 @@ spec:
- GCPVertexAI
- GCPAnthropic
- Anthropic
+ - AWSAnthropic
type: string
version:
description: |-
diff --git a/site/docs/api/api.mdx b/site/docs/api/api.mdx
index 5598313aac..0d66d1b972 100644
--- a/site/docs/api/api.mdx
+++ b/site/docs/api/api.mdx
@@ -762,6 +762,11 @@ APISchema defines the API schema.
type="enum"
required="false"
description="APISchemaAnthropic is the native Anthropic API schema.
https://docs.claude.com/en/home
"
+/>
#### AWSCredentialsFile
diff --git a/site/docs/capabilities/llm-integrations/supported-endpoints.md b/site/docs/capabilities/llm-integrations/supported-endpoints.md
index 42d33e38c6..cae217e991 100644
--- a/site/docs/capabilities/llm-integrations/supported-endpoints.md
+++ b/site/docs/capabilities/llm-integrations/supported-endpoints.md
@@ -80,6 +80,7 @@ curl -H "Content-Type: application/json" \
- Anthropic
- GCP Anthropic
+- AWS Anthropic
**Example:**
@@ -246,6 +247,7 @@ The following table summarizes which providers support which endpoints:
| [Tetrate Agent Router Service (TARS)](https://router.tetrate.ai/) | ⚠️ | ⚠️ | ⚠️ | ❌ | ❌ | Via OpenAI-compatible API |
| [Google Vertex AI](https://cloud.google.com/vertex-ai/docs/reference/rest) | ✅ | 🚧 | 🚧 | ❌ | ❌ | Via OpenAI-compatible API |
| [Anthropic on Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude) | ✅ | ❌ | 🚧 | ❌ | ✅ | Via OpenAI-compatible API and Native Anthropic API |
+| [Anthropic on AWS Bedrock](https://aws.amazon.com/bedrock/anthropic/) | 🚧 | ❌ | ❌ | ❌ | ✅ | Native Anthropic API |
| [SambaNova](https://docs.sambanova.ai/sambastudio/latest/open-ai-api.html) | ✅ | ⚠️ | ✅ | ❌ | ❌ | Via OpenAI-compatible API |
| [Anthropic](https://docs.claude.com/en/home) | ✅ | ❌ | ❌ | ❌ | ✅ | Via OpenAI-compatible API and Native Anthropic API |
diff --git a/site/docs/getting-started/connect-providers/aws-bedrock.md b/site/docs/getting-started/connect-providers/aws-bedrock.md
index c97274632d..6e6eafb6c5 100644
--- a/site/docs/getting-started/connect-providers/aws-bedrock.md
+++ b/site/docs/getting-started/connect-providers/aws-bedrock.md
@@ -6,7 +6,7 @@ sidebar_position: 3
# Connect AWS Bedrock
-This guide will help you configure Envoy AI Gateway to work with AWS Bedrock's foundation models.
+This guide will help you configure Envoy AI Gateway to work with AWS Bedrock's foundation models, including Llama, Anthropic Claude, and other models available on AWS Bedrock.
## Prerequisites
@@ -47,7 +47,8 @@ Your IAM policy needs these permissions:
"Action": [
"bedrock:InvokeModel",
"bedrock:InvokeModelWithResponseStream",
- "bedrock:ListFoundationModels"
+ "bedrock:ListFoundationModels",
+ "aws-marketplace:ViewSubscriptions"
],
"Resource": "*"
}
@@ -165,7 +166,44 @@ curl -H "Content-Type: application/json" -d '{
}' http://$GATEWAY_URL/v1/chat/completions
```
----
+You can also access an Anthropic model with native Anthropic messages endpoint:
+
+```shell
+curl -H "Content-Type: application/json" \
+ -d '{
+ "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+ "messages": [
+ {
+ "role": "user",
+ "content": "What is capital of France?"
+ }
+ ],
+ "max_tokens": 100
+ }' \
+ $GATEWAY_URL/anthropic/v1/messages
+```
+
+Expected output:
+
+```json
+{
+ "id": "msg_01XFDUDYJgAACzvnptvVoYEL",
+ "type": "message",
+ "role": "assistant",
+ "content": [
+ {
+ "type": "text",
+ "text": "The capital of France is Paris."
+ }
+ ],
+ "model": "claude-3-5-sonnet-20241022",
+ "stop_reason": "end_turn",
+ "usage": {
+ "input_tokens": 13,
+ "output_tokens": 8
+ }
+}
+```
## Troubleshooting
@@ -230,6 +268,115 @@ spec:
- name: envoy-ai-gateway-basic-aws
```
+## Using Anthropic Native API
+
+When using Anthropic models on AWS Bedrock, you have two options:
+
+1. **OpenAI-compatible format** (`/v1/chat/completions`) - Works with most models but may not support all Anthropic-specific features
+2. **Native Anthropic API** (`/anthropic/v1/messages`) - Provides full access to Anthropic-specific features (only for Anthropic models)
+
+### Streaming with Native Anthropic API
+
+The native Anthropic API also supports streaming responses:
+
+```shell
+curl -H "Content-Type: application/json" \
+ -d '{
+ "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+ "messages": [
+ {
+ "role": "user",
+ "content": "Count from 1 to 5."
+ }
+ ],
+ "max_tokens": 100,
+ "stream": true
+ }' \
+ $GATEWAY_URL/anthropic/v1/messages
+```
+
+## Advanced Features with Anthropic Models
+
+Since the gateway supports the native Anthropic API, you have full access to Anthropic-specific features:
+
+### Extended Thinking
+
+```shell
+curl -H "Content-Type: application/json" \
+ -d '{
+ "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+ "messages": [
+ {
+ "role": "user",
+ "content": "Solve this puzzle: A farmer needs to cross a river with a fox, chicken, and bag of grain. The boat can only hold the farmer and one item. How does the farmer get everything across safely?"
+ }
+ ],
+ "max_tokens": 1000,
+ "thinking": {
+ "type": "enabled",
+ "budget_tokens": 5000
+ }
+ }' \
+ $GATEWAY_URL/anthropic/v1/messages
+```
+
+### Prompt Caching
+
+```shell
+curl -H "Content-Type: application/json" \
+ -d '{
+ "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+ "system": [
+ {
+ "type": "text",
+ "text": "You are an AI assistant specialized in Python programming. You help users write clean, efficient Python code.",
+ "cache_control": {"type": "ephemeral"}
+ }
+ ],
+ "messages": [
+ {
+ "role": "user",
+ "content": "Write a function to calculate fibonacci numbers."
+ }
+ ],
+ "max_tokens": 500
+ }' \
+ $GATEWAY_URL/anthropic/v1/messages
+```
+
+### Tool Use (Function Calling)
+
+```shell
+curl -H "Content-Type: application/json" \
+ -d '{
+ "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+ "messages": [
+ {
+ "role": "user",
+ "content": "What is the weather in San Francisco?"
+ }
+ ],
+ "max_tokens": 500,
+ "tools": [
+ {
+ "name": "get_weather",
+ "description": "Get the current weather in a given location",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state, e.g. San Francisco, CA"
+ }
+ },
+ "required": ["location"]
+ }
+ }
+ ]
+ }' \
+ $GATEWAY_URL/anthropic/v1/messages
+```
+
[AIGatewayRouteRule]: ../../api/api.mdx#aigatewayrouterule
[model ID]: https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html
[Claude 3 Sonnet]: https://docs.anthropic.com/en/docs/about-claude/models#model-comparison-table
diff --git a/site/docs/getting-started/connect-providers/index.md b/site/docs/getting-started/connect-providers/index.md
index 2137c9ad63..fb23dc4abd 100644
--- a/site/docs/getting-started/connect-providers/index.md
+++ b/site/docs/getting-started/connect-providers/index.md
@@ -44,3 +44,4 @@ Choose your provider to get started:
- [Connect OpenAI](./openai.md)
- [Connect AWS Bedrock](./aws-bedrock.md)
- [Connect Azure OpenAI](./azure-openai.md)
+- [Connect GCP VertexAI](./gcp-vertexai.md)
diff --git a/tests/extproc/envoy.yaml b/tests/extproc/envoy.yaml
index ffe1af0481..29e2674c4b 100644
--- a/tests/extproc/envoy.yaml
+++ b/tests/extproc/envoy.yaml
@@ -58,6 +58,24 @@ static_resources:
route:
auto_host_rewrite: true
cluster: aws_bedrock
+ - match:
+ prefix: "/"
+ headers:
+ - name: x-ai-eg-model
+ string_match:
+ exact: claude-sonnet-4-5
+ route:
+ auto_host_rewrite: true
+ cluster: anthropic
+ - match:
+ prefix: "/"
+ headers:
+ - name: x-ai-eg-model
+ string_match:
+ exact: global.anthropic.claude-sonnet-4-5-20250929-v1:0
+ route:
+ auto_host_rewrite: true
+ cluster: anthropic_aws_bedrock
- match:
prefix: "/"
headers:
@@ -222,6 +240,14 @@ static_resources:
exact: gcp-anthropicai
route:
cluster: testupstream-gcp-anthropicai
+ - match:
+ prefix: "/"
+ headers:
+ - name: x-test-backend
+ string_match:
+ exact: aws-anthropic
+ route:
+ cluster: testupstream-aws-anthropic
- match:
prefix: "/"
headers:
@@ -720,6 +746,65 @@ static_resources:
filter_metadata:
aigateway.envoy.io:
per_route_rule_backend_name: "aws-bedrock"
+ - name: anthropic_aws_bedrock
+ connect_timeout: 30s
+ type: STRICT_DNS
+ outlier_detection:
+ consecutive_5xx: 1
+ interval: 1s
+ base_ejection_time: 2s # Must be smaller than the require.Eventually's interval. Otherwise, the tests may pass without going through the fallback since the always-failing backend could be ejected by the time when require.Eventually retries due to the previous request IF the retry is not configured.
+ max_ejection_percent: 100
+ typed_extension_protocol_options:
+ envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
+ "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
+ explicit_http_config:
+ http_protocol_options: {}
+ http_filters:
+ - name: upstream_extproc
+ typed_config:
+ "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
+ allow_mode_override: true
+ request_attributes:
+ - xds.upstream_host_metadata
+ processing_mode:
+ request_header_mode: "SEND"
+ request_body_mode: "NONE"
+ response_header_mode: "SKIP"
+ response_body_mode: "NONE"
+ grpc_service:
+ envoy_grpc:
+ cluster_name: extproc_cluster
+ metadataOptions:
+ receivingNamespaces:
+ untyped:
+ - io.envoy.ai_gateway
+ - name: envoy.filters.http.header_mutation
+ typed_config:
+ "@type": type.googleapis.com/envoy.extensions.filters.http.header_mutation.v3.HeaderMutation
+ mutations:
+ request_mutations:
+ - append:
+ append_action: ADD_IF_ABSENT
+ header:
+ key: content-length
+ value: "%DYNAMIC_METADATA(io.envoy.ai_gateway:content_length)%"
+ - name: envoy.filters.http.upstream_codec
+ typed_config:
+ "@type": type.googleapis.com/envoy.extensions.filters.http.upstream_codec.v3.UpstreamCodec
+ load_assignment:
+ cluster_name: anthropic_aws_bedrock
+ endpoints:
+ - lb_endpoints:
+ - endpoint:
+ hostname: bedrock-runtime.us-east-1.amazonaws.com
+ address:
+ socket_address:
+ address: bedrock-runtime.us-east-1.amazonaws.com
+ port_value: 443
+ metadata:
+ filter_metadata:
+ aigateway.envoy.io:
+ per_route_rule_backend_name: "anthropic-aws-bedrock"
transport_socket:
name: envoy.transport_sockets.tls
typed_config:
@@ -843,6 +928,65 @@ static_resources:
filter_metadata:
aigateway.envoy.io:
per_route_rule_backend_name: "testupstream-gcp-anthropicai"
+ - name: testupstream-aws-anthropic
+ connect_timeout: 0.25s
+ type: STATIC
+ lb_policy: ROUND_ROBIN
+ outlier_detection:
+ consecutive_5xx: 1
+ interval: 1s
+ base_ejection_time: 2s # Must be smaller than the require.Eventually's interval. Otherwise, the tests may pass without going through the fallback since the always-failing backend could be ejected by the time when require.Eventually retries due to the previous request IF the retry is not configured.
+ max_ejection_percent: 100
+ typed_extension_protocol_options:
+ envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
+ "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
+ explicit_http_config:
+ http_protocol_options: {}
+ http_filters:
+ - name: upstream_extproc
+ typed_config:
+ "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
+ request_attributes:
+ - xds.upstream_host_metadata
+ processing_mode:
+ request_header_mode: "SEND"
+ request_body_mode: "NONE"
+ response_header_mode: "SKIP"
+ response_body_mode: "NONE"
+ grpc_service:
+ envoy_grpc:
+ cluster_name: extproc_cluster
+ metadataOptions:
+ receivingNamespaces:
+ untyped:
+ - io.envoy.ai_gateway
+ - name: envoy.filters.http.header_mutation
+ typed_config:
+ "@type": type.googleapis.com/envoy.extensions.filters.http.header_mutation.v3.HeaderMutation
+ mutations:
+ request_mutations:
+ - append:
+ append_action: ADD_IF_ABSENT
+ header:
+ key: content-length
+ value: "%DYNAMIC_METADATA(io.envoy.ai_gateway:content_length)%"
+ - name: envoy.filters.http.upstream_codec
+ typed_config:
+ "@type": type.googleapis.com/envoy.extensions.filters.http.upstream_codec.v3.UpstreamCodec
+ load_assignment:
+ cluster_name: testupstream-aws-anthropic
+ endpoints:
+ - priority: 0
+ lb_endpoints:
+ - endpoint:
+ address:
+ socket_address:
+ address: 127.0.0.1
+ port_value: 8080
+ metadata:
+ filter_metadata:
+ aigateway.envoy.io:
+ per_route_rule_backend_name: "testupstream-aws-anthropic"
- name: openai
connect_timeout: 30s
type: STRICT_DNS
@@ -918,6 +1062,69 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
auto_host_sni: true
+ - name: anthropic
+ connect_timeout: 30s
+ type: STRICT_DNS
+ outlier_detection:
+ consecutive_5xx: 1
+ interval: 1s
+ base_ejection_time: 2s # Must be smaller than the require.Eventually's interval. Otherwise, the tests may pass without going through the fallback since the always-failing backend could be ejected by the time when require.Eventually retries due to the previous request IF the retry is not configured.
+ max_ejection_percent: 100
+ typed_extension_protocol_options:
+ envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
+ "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
+ explicit_http_config:
+ http_protocol_options: {}
+ http_filters:
+ - name: upstream_extproc
+ typed_config:
+ "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
+ request_attributes:
+ - xds.upstream_host_metadata
+ processing_mode:
+ request_header_mode: "SEND"
+ request_body_mode: "NONE"
+ response_header_mode: "SKIP"
+ response_body_mode: "NONE"
+ grpc_service:
+ envoy_grpc:
+ cluster_name: extproc_cluster
+ metadataOptions:
+ receivingNamespaces:
+ untyped:
+ - io.envoy.ai_gateway
+ - name: envoy.filters.http.header_mutation
+ typed_config:
+ "@type": type.googleapis.com/envoy.extensions.filters.http.header_mutation.v3.HeaderMutation
+ mutations:
+ request_mutations:
+ - append:
+ append_action: ADD_IF_ABSENT
+ header:
+ key: content-length
+ value: "%DYNAMIC_METADATA(io.envoy.ai_gateway:content_length)%"
+ - name: envoy.filters.http.upstream_codec
+ typed_config:
+ "@type": type.googleapis.com/envoy.extensions.filters.http.upstream_codec.v3.UpstreamCodec
+ load_assignment:
+ cluster_name: anthropic
+ endpoints:
+ - lb_endpoints:
+ - endpoint:
+ hostname: api.anthropic.com
+ address:
+ socket_address:
+ address: api.anthropic.com
+ port_value: 443
+ metadata:
+ filter_metadata:
+ aigateway.envoy.io:
+ per_route_rule_backend_name: "anthropic"
+ transport_socket:
+ name: envoy.transport_sockets.tls
+ typed_config:
+ "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
+ auto_host_sni: true
- name: azure_openai
connect_timeout: 30s
type: STRICT_DNS
diff --git a/tests/extproc/extproc_test.go b/tests/extproc/extproc_test.go
index 5992b65565..3979e225f5 100644
--- a/tests/extproc/extproc_test.go
+++ b/tests/extproc/extproc_test.go
@@ -36,6 +36,7 @@ const (
var (
openAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1"}
awsBedrockSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAWSBedrock}
+ awsAnthropicSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAWSAnthropic, Version: "bedrock-2023-05-31"}
azureOpenAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAzureOpenAI, Version: "2025-01-01-preview"}
gcpVertexAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaGCPVertexAI}
gcpAnthropicAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaGCPAnthropic, Version: "vertex-2023-10-16"}
@@ -44,6 +45,7 @@ var (
grokSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1"}
sambaNovaSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1"}
deepInfraSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1/openai"}
+ anthropicSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAnthropic}
testUpstreamOpenAIBackend = filterapi.Backend{Name: "testupstream-openai", Schema: openAISchema}
testUpstreamModelNameOverride = filterapi.Backend{Name: "testupstream-modelname-override", ModelNameOverride: "override-model", Schema: openAISchema}
@@ -59,7 +61,8 @@ var (
Region: "gcp-region",
ProjectName: "gcp-project-name",
}}}
- alwaysFailingBackend = filterapi.Backend{Name: "always-failing-backend", Schema: openAISchema}
+ testUpstreamAWSAnthropicBackend = filterapi.Backend{Name: "testupstream-aws-anthropic", Schema: awsAnthropicSchema}
+ alwaysFailingBackend = filterapi.Backend{Name: "always-failing-backend", Schema: openAISchema}
// envoyConfig is the embedded Envoy configuration template.
//
diff --git a/tests/extproc/real_providers_test.go b/tests/extproc/real_providers_test.go
index fc51add5a4..7c3fd4b37c 100644
--- a/tests/extproc/real_providers_test.go
+++ b/tests/extproc/real_providers_test.go
@@ -14,6 +14,8 @@ import (
"testing"
"time"
+ "github.com/anthropics/anthropic-sdk-go"
+ anthropicoption "github.com/anthropics/anthropic-sdk-go/option"
"github.com/openai/openai-go"
"github.com/openai/openai-go/option"
"github.com/stretchr/testify/assert"
@@ -46,10 +48,17 @@ func TestWithRealProviders(t *testing.T) {
{Name: "openai", Schema: openAISchema, Auth: &filterapi.BackendAuth{
APIKey: &filterapi.APIKeyAuth{Key: cc.OpenAIAPIKey},
}},
+ {Name: "anthropic", Schema: anthropicSchema, Auth: &filterapi.BackendAuth{
+ AnthropicAPIKey: &filterapi.AnthropicAPIKeyAuth{Key: cc.AnthropicAPIKey},
+ }},
{Name: "aws-bedrock", Schema: awsBedrockSchema, Auth: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{
CredentialFileLiteral: cc.AWSFileLiteral,
Region: "us-east-1",
}}},
+ {Name: "anthropic-aws-bedrock", Schema: awsAnthropicSchema, Auth: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{
+ CredentialFileLiteral: cc.AWSFileLiteral,
+ Region: "us-east-1",
+ }}},
{Name: "azure-openai", Schema: azureOpenAISchema, Auth: &filterapi.BackendAuth{
AzureAuth: &filterapi.AzureAuth{AccessToken: cc.AzureAccessToken},
}},
@@ -119,6 +128,17 @@ func TestWithRealProviders(t *testing.T) {
})
}
})
+ t.Run("messages", func(t *testing.T) {
+ for _, tc := range []realProvidersTestCase{
+ {name: "anthropic", modelName: "claude-sonnet-4-5", required: internaltesting.RequiredCredentialAnthropic},
+ {name: "anthropic-aws-bedrock", modelName: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", required: internaltesting.RequiredCredentialAWS},
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ cc.MaybeSkip(t, tc.required)
+ requireEventuallyMessagesNonStreamingRequestOK(t, listenerAddress, tc.modelName)
+ })
+ }
+ })
})
// Read all access logs and check if the used token is logged.
@@ -362,6 +382,33 @@ func requireEventuallyChatCompletionNonStreamingRequestOK(t *testing.T, listener
}, realProvidersEventuallyTimeout, realProvidersEventuallyInterval)
}
+func requireEventuallyMessagesNonStreamingRequestOK(t *testing.T, listenerAddress, modelName string) {
+ client := anthropic.NewClient(
+ anthropicoption.WithAPIKey("dummy"),
+ anthropicoption.WithBaseURL(listenerAddress+"/anthropic/"),
+ )
+ internaltesting.RequireEventuallyNoError(t, func() error {
+ message, err := client.Messages.New(t.Context(), anthropic.MessageNewParams{
+ MaxTokens: 1024,
+ Messages: []anthropic.MessageParam{
+ anthropic.NewUserMessage(anthropic.NewTextBlock("Say hi!")),
+ },
+ Model: anthropic.Model(modelName),
+ })
+ if err != nil {
+ t.Logf("messages error: %v", err)
+ return fmt.Errorf("messages error: %w", err)
+ }
+
+ if len(message.Content) == 0 {
+ return fmt.Errorf("empty message content in response")
+ }
+
+ t.Logf("response: %+v", message.Content)
+ return nil
+ }, realProvidersEventuallyTimeout, realProvidersEventuallyInterval)
+}
+
func requireEventuallyEmbeddingsRequestOK(t *testing.T, listenerAddress, modelName string) {
client := openai.NewClient(option.WithBaseURL(listenerAddress + "/v1/"))
require.Eventually(t, func() bool {
diff --git a/tests/extproc/testupstream_test.go b/tests/extproc/testupstream_test.go
index 550ab7d579..0b57520bc6 100644
--- a/tests/extproc/testupstream_test.go
+++ b/tests/extproc/testupstream_test.go
@@ -61,6 +61,7 @@ func TestWithTestUpstream(t *testing.T) {
testUpstreamAzureBackend,
testUpstreamGCPVertexAIBackend,
testUpstreamGCPAnthropicAIBackend,
+ testUpstreamAWSAnthropicBackend,
{
Name: "testupstream-openai-5xx", Schema: openAISchema, HeaderMutation: &filterapi.HTTPHeaderMutation{
Set: []filterapi.HTTPHeader{{Name: testupstreamlib.ResponseStatusKey, Value: "500"}},
@@ -892,7 +893,7 @@ data: {"type": "message_stop"}
method: http.MethodPost,
expRequestHeaders: map[string]string{"x-api-key": "anthropic-api-key"},
requestBody: `{
- "model": "claude-sonnet-4-5",
+ "model": "foo",
"max_tokens": 1000,
"messages": [
{
@@ -902,7 +903,7 @@ data: {"type": "message_stop"}
]
}`,
expPath: "/v1/messages",
- responseBody: `{"model":"claude-sonnet-4-5-20250929","id":"msg_01J5gW6Sffiem6avXSAooZZw","type":"message","role":"assistant","content":[{"type":"text","text":"Hi! 👋 How can I help you today?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":16,"service_tier":"standard"}}`,
+ responseBody: `{"model":"foo","id":"msg_01J5gW6Sffiem6avXSAooZZw","type":"message","role":"assistant","content":[{"type":"text","text":"Hi! 👋 How can I help you today?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":16,"service_tier":"standard"}}`,
expStatus: http.StatusOK,
},
{
@@ -913,7 +914,7 @@ data: {"type": "message_stop"}
expRequestHeaders: map[string]string{"x-api-key": "anthropic-api-key"},
responseType: "sse",
requestBody: `{
- "model": "claude-sonnet-4-5",
+ "model": "foo",
"max_tokens": 1000,
"messages": [
{
@@ -925,7 +926,7 @@ data: {"type": "message_stop"}
expPath: "/v1/messages",
responseBody: `
event: message_start
-data: {"type":"message_start","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_01BfvfMsg2gBzwsk6PZRLtDg","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}} }
+data: {"type":"message_start","message":{"model":"foo","id":"msg_01BfvfMsg2gBzwsk6PZRLtDg","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}} }
event: content_block_start
data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} }
@@ -953,6 +954,87 @@ data: {"type":"message_stop" }
`,
expStatus: http.StatusOK,
},
+ {
+ name: "aws-anthropic - /anthropic/v1/messages",
+ backend: "aws-anthropic",
+ path: "/anthropic/v1/messages",
+ method: http.MethodPost,
+ requestBody: `{"model":"anthropic.claude-3-sonnet-20240229-v1:0","max_tokens":100,"messages":[{"role":"user","content":[{"type":"text","text":"Hello from AWS!"}]}],"stream":false}`,
+ expRequestBody: `{"max_tokens":100,"messages":[{"role":"user","content":[{"type":"text","text":"Hello from AWS!"}]}],"stream":false,"anthropic_version":"bedrock-2023-05-31"}`,
+ expPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
+ responseStatus: strconv.Itoa(http.StatusOK),
+ responseBody: `{"id":"msg_aws_123","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hello from AWS Anthropic!"}],"usage":{"input_tokens":10,"output_tokens":20}}`,
+ expStatus: http.StatusOK,
+ expResponseBody: `{"id":"msg_aws_123","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hello from AWS Anthropic!"}],"usage":{"input_tokens":10,"output_tokens":20}}`,
+ },
+ {
+ name: "aws-anthropic - /anthropic/v1/messages - streaming",
+ backend: "aws-anthropic",
+ path: "/anthropic/v1/messages",
+ method: http.MethodPost,
+ responseType: "sse",
+ requestBody: `{"model":"anthropic.claude-3-haiku-20240307-v1:0","max_tokens":150,"messages":[{"role":"user","content":[{"type":"text","text":"Tell me a joke"}]}],"stream":true}`,
+ expRequestBody: `{"max_tokens":150,"messages":[{"role":"user","content":[{"type":"text","text":"Tell me a joke"}]}],"stream":true,"anthropic_version":"bedrock-2023-05-31"}`,
+ expPath: "/model/anthropic.claude-3-haiku-20240307-v1:0/invoke-stream",
+ responseStatus: strconv.Itoa(http.StatusOK),
+ responseBody: `event: message_start
+data: {"type":"message_start","message":{"id":"msg_aws_456","usage":{"input_tokens":12}}}
+
+event: content_block_start
+data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Why did the"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" chicken cross the road?"}}
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":0}
+
+event: message_delta
+data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":18}}
+
+event: message_stop
+data: {"type":"message_stop"}
+
+`,
+ expStatus: http.StatusOK,
+ expResponseBody: `event: message_start
+data: {"type":"message_start","message":{"id":"msg_aws_456","usage":{"input_tokens":12}}}
+
+event: content_block_start
+data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Why did the"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" chicken cross the road?"}}
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":0}
+
+event: message_delta
+data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":18}}
+
+event: message_stop
+data: {"type":"message_stop"}
+
+`,
+ },
+ {
+ name: "aws-anthropic - /anthropic/v1/messages - error response",
+ backend: "aws-anthropic",
+ path: "/anthropic/v1/messages",
+ method: http.MethodPost,
+ requestBody: `{"model":"anthropic.claude-3-sonnet-20240229-v1:0","max_tokens":100,"messages":[{"role":"user","content":[{"type":"text","text":"Test error"}]}]}`,
+ expPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
+ responseStatus: strconv.Itoa(http.StatusBadRequest),
+ responseBody: `{"type":"error","error":{"type":"validation_error","message":"Invalid request format"}}`,
+ expStatus: http.StatusBadRequest,
+ expResponseBody: `{"type":"error","error":{"type":"validation_error","message":"Invalid request format"}}`,
+ },
} {
t.Run(tc.name, func(t *testing.T) {
listenerAddress := fmt.Sprintf("http://localhost:%d", listenerPort)