diff --git a/api/v1alpha1/shared_types.go b/api/v1alpha1/shared_types.go index 596a6a56e2..98e97942d1 100644 --- a/api/v1alpha1/shared_types.go +++ b/api/v1alpha1/shared_types.go @@ -15,7 +15,7 @@ package v1alpha1 type VersionedAPISchema struct { // Name is the name of the API schema of the AIGatewayRoute or AIServiceBackend. // - // +kubebuilder:validation:Enum=OpenAI;AWSBedrock;AzureOpenAI;GCPVertexAI;GCPAnthropic;Anthropic + // +kubebuilder:validation:Enum=OpenAI;AWSBedrock;AzureOpenAI;GCPVertexAI;GCPAnthropic;Anthropic;AWSAnthropic Name APISchema `json:"name"` // Version is the version of the API schema. @@ -65,6 +65,12 @@ const ( // APISchemaAnthropic is the native Anthropic API schema. // https://docs.claude.com/en/home APISchemaAnthropic APISchema = "Anthropic" + // APISchemaAWSAnthropic is the schema for Anthropic models hosted on AWS Bedrock. + // Uses the native Anthropic Messages API format for requests and responses. + // + // https://aws.amazon.com/bedrock/anthropic/ + // https://docs.claude.com/en/api/claude-on-amazon-bedrock + APISchemaAWSAnthropic APISchema = "AWSAnthropic" ) const ( diff --git a/examples/basic/aws.yaml b/examples/basic/aws.yaml index 7bc37a4b2b..784972326c 100644 --- a/examples/basic/aws.yaml +++ b/examples/basic/aws.yaml @@ -23,6 +23,25 @@ spec: - name: envoy-ai-gateway-basic-aws --- apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: AIGatewayRoute +metadata: + name: envoy-ai-gateway-basic-aws-bedrock-anthropic + namespace: default +spec: + parentRefs: + - name: envoy-ai-gateway-basic + kind: Gateway + group: gateway.networking.k8s.io + rules: + - matches: + - headers: + - type: Exact + name: x-ai-eg-model + value: anthropic.claude-3-5-sonnet-20241022-v2:0 + backendRefs: + - name: envoy-ai-gateway-basic-aws-bedrock-anthropic +--- +apiVersion: aigateway.envoyproxy.io/v1alpha1 kind: AIServiceBackend metadata: name: envoy-ai-gateway-basic-aws @@ -36,6 +55,20 @@ spec: group: gateway.envoyproxy.io --- apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: AIServiceBackend +metadata: + name: envoy-ai-gateway-basic-aws-bedrock-anthropic + namespace: default +spec: + schema: + name: AWSAnthropic + version: bedrock-2023-05-31 + backendRef: + name: envoy-ai-gateway-basic-aws + kind: Backend + group: gateway.envoyproxy.io +--- +apiVersion: aigateway.envoyproxy.io/v1alpha1 kind: BackendSecurityPolicy metadata: name: envoy-ai-gateway-basic-aws-credentials @@ -45,6 +78,9 @@ spec: - group: aigateway.envoyproxy.io kind: AIServiceBackend name: envoy-ai-gateway-basic-aws + - group: aigateway.envoyproxy.io + kind: AIServiceBackend + name: envoy-ai-gateway-basic-aws-bedrock-anthropic type: AWSCredentials awsCredentials: region: us-east-1 @@ -85,6 +121,7 @@ metadata: type: Opaque stringData: # Replace this with your AWS credentials. + # You can also use AWS IAM roles for service accounts (IRSA) in EKS. credentials: | [default] aws_access_key_id = AWS_ACCESS_KEY_ID diff --git a/internal/extproc/messages_processor.go b/internal/extproc/messages_processor.go index 9a5ea3eb72..f6b48ec54f 100644 --- a/internal/extproc/messages_processor.go +++ b/internal/extproc/messages_processor.go @@ -157,10 +157,13 @@ func (c *messagesProcessorUpstreamFilter) selectTranslator(out filterapi.Version // Anthropic → GCP Anthropic (request direction translator). // Uses backend config version (GCP Vertex AI requires specific versions like "vertex-2023-10-16"). c.translator = translator.NewAnthropicToGCPAnthropicTranslator(out.Version, c.modelNameOverride) + case filterapi.APISchemaAWSAnthropic: + // Anthropic → AWS Bedrock Anthropic (request direction translator). + c.translator = translator.NewAnthropicToAWSAnthropicTranslator(out.Version, c.modelNameOverride) case filterapi.APISchemaAnthropic: c.translator = translator.NewAnthropicToAnthropicTranslator(out.Version, c.modelNameOverride) default: - return fmt.Errorf("/v1/messages endpoint only supports backends that return native Anthropic format (GCPAnthropic). Backend %s uses different model format", out.Name) + return fmt.Errorf("/v1/messages endpoint only supports backends that return native Anthropic format (Anthropic, GCPAnthropic, AWSAnthropic). Backend %s uses different model format", out.Name) } return nil } diff --git a/internal/extproc/translator/anthropic_awsanthropic.go b/internal/extproc/translator/anthropic_awsanthropic.go new file mode 100644 index 0000000000..891f7996eb --- /dev/null +++ b/internal/extproc/translator/anthropic_awsanthropic.go @@ -0,0 +1,78 @@ +// Copyright Envoy AI Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package translator + +import ( + "cmp" + "fmt" + "net/url" + + corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" + extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" + "github.com/tidwall/sjson" + + anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic" + "github.com/envoyproxy/ai-gateway/internal/internalapi" +) + +// NewAnthropicToAWSAnthropicTranslator creates a translator for Anthropic to AWS Bedrock Anthropic format. +// AWS Bedrock supports the native Anthropic Messages API, so this is essentially a passthrough +// translator with AWS-specific path modifications. +func NewAnthropicToAWSAnthropicTranslator(apiVersion string, modelNameOverride internalapi.ModelNameOverride) AnthropicMessagesTranslator { + anthropicTranslator := NewAnthropicToAnthropicTranslator(apiVersion, modelNameOverride).(*anthropicToAnthropicTranslator) + return &anthropicToAWSAnthropicTranslator{ + apiVersion: apiVersion, + anthropicToAnthropicTranslator: *anthropicTranslator, + } +} + +type anthropicToAWSAnthropicTranslator struct { + anthropicToAnthropicTranslator + apiVersion string +} + +// RequestBody implements [AnthropicMessagesTranslator.RequestBody] for Anthropic to AWS Bedrock Anthropic translation. +// This handles the transformation from native Anthropic format to AWS Bedrock format. +// https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages-request-response.html +func (a *anthropicToAWSAnthropicTranslator) RequestBody(rawBody []byte, body *anthropicschema.MessagesRequest, _ bool) ( + headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, err error, +) { + a.stream = body.GetStream() + a.requestModel = cmp.Or(a.modelNameOverride, body.GetModel()) + + var mutatedBody []byte + mutatedBody, err = sjson.SetBytes(rawBody, anthropicVersionKey, a.apiVersion) + if err != nil { + return nil, nil, fmt.Errorf("failed to set anthropic_version field: %w", err) + } + // Remove the model field from the body as AWS Bedrock expects the model to be specified in the path. + // Otherwise, AWS complains "extra inputs are not permitted". + mutatedBody, _ = sjson.DeleteBytes(mutatedBody, "model") + + // Determine the AWS Bedrock path based on whether streaming is requested. + var pathTemplate string + if body.GetStream() { + pathTemplate = "/model/%s/invoke-stream" + } else { + pathTemplate = "/model/%s/invoke" + } + + // URL encode the model ID for the path to handle ARNs with special characters. + // AWS Bedrock model IDs can be simple names (e.g., "anthropic.claude-3-5-sonnet-20241022-v2:0") + // or full ARNs which may contain special characters. + encodedModelID := url.PathEscape(a.requestModel) + path := fmt.Sprintf(pathTemplate, encodedModelID) + + headerMutation = &extprocv3.HeaderMutation{ + SetHeaders: []*corev3.HeaderValueOption{ + // Overwriting path of the Anthropic to Anthropic translator + {Header: &corev3.HeaderValue{Key: ":path", RawValue: []byte(path)}}, + }, + } + bodyMutation = &extprocv3.BodyMutation{Mutation: &extprocv3.BodyMutation_Body{Body: mutatedBody}} + setContentLength(headerMutation, mutatedBody) + return +} diff --git a/internal/extproc/translator/anthropic_awsanthropic_test.go b/internal/extproc/translator/anthropic_awsanthropic_test.go new file mode 100644 index 0000000000..90097c1f0c --- /dev/null +++ b/internal/extproc/translator/anthropic_awsanthropic_test.go @@ -0,0 +1,227 @@ +// Copyright Envoy AI Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package translator + +import ( + "encoding/json" + "testing" + + "github.com/anthropics/anthropic-sdk-go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + anthropicschema "github.com/envoyproxy/ai-gateway/internal/apischema/anthropic" +) + +func TestAnthropicToAWSAnthropicTranslator_RequestBody_ModelNameOverride(t *testing.T) { + tests := []struct { + name string + override string + inputModel string + expectedModel string + expectedInPath string + }{ + { + name: "no override uses original model", + override: "", + inputModel: "anthropic.claude-3-haiku-20240307-v1:0", + expectedModel: "anthropic.claude-3-haiku-20240307-v1:0", + expectedInPath: "anthropic.claude-3-haiku-20240307-v1:0", + }, + { + name: "override replaces model in body and path", + override: "anthropic.claude-3-sonnet-20240229-v1:0", + inputModel: "anthropic.claude-3-haiku-20240307-v1:0", + expectedModel: "anthropic.claude-3-sonnet-20240229-v1:0", + expectedInPath: "anthropic.claude-3-sonnet-20240229-v1:0", + }, + { + name: "override with empty input model", + override: "anthropic.claude-3-opus-20240229-v1:0", + inputModel: "", + expectedModel: "anthropic.claude-3-opus-20240229-v1:0", + expectedInPath: "anthropic.claude-3-opus-20240229-v1:0", + }, + { + name: "model with ARN format", + override: "", + inputModel: "arn:aws:bedrock:eu-central-1:000000000:application-inference-profile/aaaaaaaaa", + expectedModel: "arn:aws:bedrock:eu-central-1:000000000:application-inference-profile/aaaaaaaaa", + expectedInPath: "arn:aws:bedrock:eu-central-1:000000000:application-inference-profile%2Faaaaaaaaa", + }, + { + name: "global model ID", + override: "", + inputModel: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", + expectedModel: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", + expectedInPath: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", tt.override) + + // Create the request using map structure. + originalReq := &anthropicschema.MessagesRequest{ + "model": tt.inputModel, + "messages": []anthropic.MessageParam{ + { + Role: anthropic.MessageParamRoleUser, + Content: []anthropic.ContentBlockParamUnion{ + anthropic.NewTextBlock("Hello"), + }, + }, + }, + } + + rawBody, err := json.Marshal(originalReq) + require.NoError(t, err) + + headerMutation, bodyMutation, err := translator.RequestBody(rawBody, originalReq, false) + require.NoError(t, err) + require.NotNil(t, headerMutation) + require.NotNil(t, bodyMutation) + + // Check path header contains expected model (URL encoded). + // Use the last element as it takes precedence when multiple headers are set. + pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-2] + require.Equal(t, ":path", pathHeader.Header.Key) + expectedPath := "/model/" + tt.expectedInPath + "/invoke" + assert.Equal(t, expectedPath, string(pathHeader.Header.RawValue)) + + // Check that model field is removed from body (since it's in the path). + var modifiedReq map[string]any + err = json.Unmarshal(bodyMutation.GetBody(), &modifiedReq) + require.NoError(t, err) + _, hasModel := modifiedReq["model"] + assert.False(t, hasModel, "model field should be removed from request body") + + // Verify anthropic_version field is added (required by AWS Bedrock). + version, hasVersion := modifiedReq["anthropic_version"] + assert.True(t, hasVersion, "anthropic_version should be added for AWS Bedrock") + assert.Equal(t, "bedrock-2023-05-31", version, "anthropic_version should match the configured version") + }) + } +} + +func TestAnthropicToAWSAnthropicTranslator_RequestBody_StreamingPaths(t *testing.T) { + tests := []struct { + name string + stream any + expectedPathSuffix string + }{ + { + name: "non-streaming uses /invoke", + stream: false, + expectedPathSuffix: "/invoke", + }, + { + name: "streaming uses /invoke-stream", + stream: true, + expectedPathSuffix: "/invoke-stream", + }, + { + name: "missing stream defaults to /invoke", + stream: nil, + expectedPathSuffix: "/invoke", + }, + { + name: "non-boolean stream defaults to /invoke", + stream: "true", + expectedPathSuffix: "/invoke", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") + + parsedReq := &anthropicschema.MessagesRequest{ + "model": "anthropic.claude-3-sonnet-20240229-v1:0", + "messages": []anthropic.MessageParam{ + { + Role: anthropic.MessageParamRoleUser, + Content: []anthropic.ContentBlockParamUnion{ + anthropic.NewTextBlock("Test"), + }, + }, + }, + } + if tt.stream != nil { + if streamVal, ok := tt.stream.(bool); ok { + (*parsedReq)["stream"] = streamVal + } + } + + rawBody, err := json.Marshal(parsedReq) + require.NoError(t, err) + + headerMutation, _, err := translator.RequestBody(rawBody, parsedReq, false) + require.NoError(t, err) + require.NotNil(t, headerMutation) + + // Check path contains expected suffix. + // Use the last element as it takes precedence when multiple headers are set. + pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-2] + expectedPath := "/model/anthropic.claude-3-sonnet-20240229-v1:0" + tt.expectedPathSuffix + assert.Equal(t, expectedPath, string(pathHeader.Header.RawValue)) + }) + } +} + +func TestAnthropicToAWSAnthropicTranslator_URLEncoding(t *testing.T) { + tests := []struct { + name string + modelID string + expectedPath string + }{ + { + name: "simple model ID with colon", + modelID: "anthropic.claude-3-sonnet-20240229-v1:0", + expectedPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke", + }, + { + name: "full ARN with multiple special characters", + modelID: "arn:aws:bedrock:us-east-1:123456789012:foundation-model/anthropic.claude-3-sonnet-20240229-v1:0", + expectedPath: "/model/arn:aws:bedrock:us-east-1:123456789012:foundation-model%2Fanthropic.claude-3-sonnet-20240229-v1:0/invoke", + }, + { + name: "global model prefix", + modelID: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", + expectedPath: "/model/global.anthropic.claude-sonnet-4-5-20250929-v1:0/invoke", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + translator := NewAnthropicToAWSAnthropicTranslator("bedrock-2023-05-31", "") + + originalReq := &anthropicschema.MessagesRequest{ + "model": tt.modelID, + "messages": []anthropic.MessageParam{ + { + Role: anthropic.MessageParamRoleUser, + Content: []anthropic.ContentBlockParamUnion{ + anthropic.NewTextBlock("Test"), + }, + }, + }, + } + + rawBody, err := json.Marshal(originalReq) + require.NoError(t, err) + + headerMutation, _, err := translator.RequestBody(rawBody, originalReq, false) + require.NoError(t, err) + require.NotNil(t, headerMutation) + + // Use the last element as it takes precedence when multiple headers are set. + pathHeader := headerMutation.SetHeaders[len(headerMutation.SetHeaders)-2] + assert.Equal(t, tt.expectedPath, string(pathHeader.Header.RawValue)) + }) + } +} diff --git a/internal/filterapi/filterconfig.go b/internal/filterapi/filterconfig.go index 25ffc0c383..ad312c31d9 100644 --- a/internal/filterapi/filterconfig.go +++ b/internal/filterapi/filterconfig.go @@ -117,6 +117,9 @@ const ( APISchemaGCPAnthropic APISchemaName = "GCPAnthropic" // APISchemaAnthropic represents the standard Anthropic API schema. APISchemaAnthropic APISchemaName = "Anthropic" + // APISchemaAWSAnthropic represents the AWS Bedrock Anthropic API schema. + // Used for Claude models hosted on AWS Bedrock using the native Anthropic Messages API. + APISchemaAWSAnthropic APISchemaName = "AWSAnthropic" ) // RouteRuleName is the name of the route rule. diff --git a/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aiservicebackends.yaml b/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aiservicebackends.yaml index 6c2cf79190..f46b75d026 100644 --- a/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aiservicebackends.yaml +++ b/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aiservicebackends.yaml @@ -235,6 +235,7 @@ spec: - GCPVertexAI - GCPAnthropic - Anthropic + - AWSAnthropic type: string version: description: |- diff --git a/site/docs/api/api.mdx b/site/docs/api/api.mdx index 5598313aac..0d66d1b972 100644 --- a/site/docs/api/api.mdx +++ b/site/docs/api/api.mdx @@ -762,6 +762,11 @@ APISchema defines the API schema. type="enum" required="false" description="APISchemaAnthropic is the native Anthropic API schema.
https://docs.claude.com/en/home
" +/> #### AWSCredentialsFile diff --git a/site/docs/capabilities/llm-integrations/supported-endpoints.md b/site/docs/capabilities/llm-integrations/supported-endpoints.md index 42d33e38c6..cae217e991 100644 --- a/site/docs/capabilities/llm-integrations/supported-endpoints.md +++ b/site/docs/capabilities/llm-integrations/supported-endpoints.md @@ -80,6 +80,7 @@ curl -H "Content-Type: application/json" \ - Anthropic - GCP Anthropic +- AWS Anthropic **Example:** @@ -246,6 +247,7 @@ The following table summarizes which providers support which endpoints: | [Tetrate Agent Router Service (TARS)](https://router.tetrate.ai/) | ⚠️ | ⚠️ | ⚠️ | ❌ | ❌ | Via OpenAI-compatible API | | [Google Vertex AI](https://cloud.google.com/vertex-ai/docs/reference/rest) | ✅ | 🚧 | 🚧 | ❌ | ❌ | Via OpenAI-compatible API | | [Anthropic on Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude) | ✅ | ❌ | 🚧 | ❌ | ✅ | Via OpenAI-compatible API and Native Anthropic API | +| [Anthropic on AWS Bedrock](https://aws.amazon.com/bedrock/anthropic/) | 🚧 | ❌ | ❌ | ❌ | ✅ | Native Anthropic API | | [SambaNova](https://docs.sambanova.ai/sambastudio/latest/open-ai-api.html) | ✅ | ⚠️ | ✅ | ❌ | ❌ | Via OpenAI-compatible API | | [Anthropic](https://docs.claude.com/en/home) | ✅ | ❌ | ❌ | ❌ | ✅ | Via OpenAI-compatible API and Native Anthropic API | diff --git a/site/docs/getting-started/connect-providers/aws-bedrock.md b/site/docs/getting-started/connect-providers/aws-bedrock.md index c97274632d..6e6eafb6c5 100644 --- a/site/docs/getting-started/connect-providers/aws-bedrock.md +++ b/site/docs/getting-started/connect-providers/aws-bedrock.md @@ -6,7 +6,7 @@ sidebar_position: 3 # Connect AWS Bedrock -This guide will help you configure Envoy AI Gateway to work with AWS Bedrock's foundation models. +This guide will help you configure Envoy AI Gateway to work with AWS Bedrock's foundation models, including Llama, Anthropic Claude, and other models available on AWS Bedrock. ## Prerequisites @@ -47,7 +47,8 @@ Your IAM policy needs these permissions: "Action": [ "bedrock:InvokeModel", "bedrock:InvokeModelWithResponseStream", - "bedrock:ListFoundationModels" + "bedrock:ListFoundationModels", + "aws-marketplace:ViewSubscriptions" ], "Resource": "*" } @@ -165,7 +166,44 @@ curl -H "Content-Type: application/json" -d '{ }' http://$GATEWAY_URL/v1/chat/completions ``` ---- +You can also access an Anthropic model with native Anthropic messages endpoint: + +```shell +curl -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "messages": [ + { + "role": "user", + "content": "What is capital of France?" + } + ], + "max_tokens": 100 + }' \ + $GATEWAY_URL/anthropic/v1/messages +``` + +Expected output: + +```json +{ + "id": "msg_01XFDUDYJgAACzvnptvVoYEL", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "The capital of France is Paris." + } + ], + "model": "claude-3-5-sonnet-20241022", + "stop_reason": "end_turn", + "usage": { + "input_tokens": 13, + "output_tokens": 8 + } +} +``` ## Troubleshooting @@ -230,6 +268,115 @@ spec: - name: envoy-ai-gateway-basic-aws ``` +## Using Anthropic Native API + +When using Anthropic models on AWS Bedrock, you have two options: + +1. **OpenAI-compatible format** (`/v1/chat/completions`) - Works with most models but may not support all Anthropic-specific features +2. **Native Anthropic API** (`/anthropic/v1/messages`) - Provides full access to Anthropic-specific features (only for Anthropic models) + +### Streaming with Native Anthropic API + +The native Anthropic API also supports streaming responses: + +```shell +curl -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "messages": [ + { + "role": "user", + "content": "Count from 1 to 5." + } + ], + "max_tokens": 100, + "stream": true + }' \ + $GATEWAY_URL/anthropic/v1/messages +``` + +## Advanced Features with Anthropic Models + +Since the gateway supports the native Anthropic API, you have full access to Anthropic-specific features: + +### Extended Thinking + +```shell +curl -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "messages": [ + { + "role": "user", + "content": "Solve this puzzle: A farmer needs to cross a river with a fox, chicken, and bag of grain. The boat can only hold the farmer and one item. How does the farmer get everything across safely?" + } + ], + "max_tokens": 1000, + "thinking": { + "type": "enabled", + "budget_tokens": 5000 + } + }' \ + $GATEWAY_URL/anthropic/v1/messages +``` + +### Prompt Caching + +```shell +curl -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "system": [ + { + "type": "text", + "text": "You are an AI assistant specialized in Python programming. You help users write clean, efficient Python code.", + "cache_control": {"type": "ephemeral"} + } + ], + "messages": [ + { + "role": "user", + "content": "Write a function to calculate fibonacci numbers." + } + ], + "max_tokens": 500 + }' \ + $GATEWAY_URL/anthropic/v1/messages +``` + +### Tool Use (Function Calling) + +```shell +curl -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "messages": [ + { + "role": "user", + "content": "What is the weather in San Francisco?" + } + ], + "max_tokens": 500, + "tools": [ + { + "name": "get_weather", + "description": "Get the current weather in a given location", + "input_schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + } + }, + "required": ["location"] + } + } + ] + }' \ + $GATEWAY_URL/anthropic/v1/messages +``` + [AIGatewayRouteRule]: ../../api/api.mdx#aigatewayrouterule [model ID]: https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html [Claude 3 Sonnet]: https://docs.anthropic.com/en/docs/about-claude/models#model-comparison-table diff --git a/site/docs/getting-started/connect-providers/index.md b/site/docs/getting-started/connect-providers/index.md index 2137c9ad63..fb23dc4abd 100644 --- a/site/docs/getting-started/connect-providers/index.md +++ b/site/docs/getting-started/connect-providers/index.md @@ -44,3 +44,4 @@ Choose your provider to get started: - [Connect OpenAI](./openai.md) - [Connect AWS Bedrock](./aws-bedrock.md) - [Connect Azure OpenAI](./azure-openai.md) +- [Connect GCP VertexAI](./gcp-vertexai.md) diff --git a/tests/extproc/envoy.yaml b/tests/extproc/envoy.yaml index ffe1af0481..29e2674c4b 100644 --- a/tests/extproc/envoy.yaml +++ b/tests/extproc/envoy.yaml @@ -58,6 +58,24 @@ static_resources: route: auto_host_rewrite: true cluster: aws_bedrock + - match: + prefix: "/" + headers: + - name: x-ai-eg-model + string_match: + exact: claude-sonnet-4-5 + route: + auto_host_rewrite: true + cluster: anthropic + - match: + prefix: "/" + headers: + - name: x-ai-eg-model + string_match: + exact: global.anthropic.claude-sonnet-4-5-20250929-v1:0 + route: + auto_host_rewrite: true + cluster: anthropic_aws_bedrock - match: prefix: "/" headers: @@ -222,6 +240,14 @@ static_resources: exact: gcp-anthropicai route: cluster: testupstream-gcp-anthropicai + - match: + prefix: "/" + headers: + - name: x-test-backend + string_match: + exact: aws-anthropic + route: + cluster: testupstream-aws-anthropic - match: prefix: "/" headers: @@ -720,6 +746,65 @@ static_resources: filter_metadata: aigateway.envoy.io: per_route_rule_backend_name: "aws-bedrock" + - name: anthropic_aws_bedrock + connect_timeout: 30s + type: STRICT_DNS + outlier_detection: + consecutive_5xx: 1 + interval: 1s + base_ejection_time: 2s # Must be smaller than the require.Eventually's interval. Otherwise, the tests may pass without going through the fallback since the always-failing backend could be ejected by the time when require.Eventually retries due to the previous request IF the retry is not configured. + max_ejection_percent: 100 + typed_extension_protocol_options: + envoy.extensions.upstreams.http.v3.HttpProtocolOptions: + "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions + explicit_http_config: + http_protocol_options: {} + http_filters: + - name: upstream_extproc + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor + allow_mode_override: true + request_attributes: + - xds.upstream_host_metadata + processing_mode: + request_header_mode: "SEND" + request_body_mode: "NONE" + response_header_mode: "SKIP" + response_body_mode: "NONE" + grpc_service: + envoy_grpc: + cluster_name: extproc_cluster + metadataOptions: + receivingNamespaces: + untyped: + - io.envoy.ai_gateway + - name: envoy.filters.http.header_mutation + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.header_mutation.v3.HeaderMutation + mutations: + request_mutations: + - append: + append_action: ADD_IF_ABSENT + header: + key: content-length + value: "%DYNAMIC_METADATA(io.envoy.ai_gateway:content_length)%" + - name: envoy.filters.http.upstream_codec + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.upstream_codec.v3.UpstreamCodec + load_assignment: + cluster_name: anthropic_aws_bedrock + endpoints: + - lb_endpoints: + - endpoint: + hostname: bedrock-runtime.us-east-1.amazonaws.com + address: + socket_address: + address: bedrock-runtime.us-east-1.amazonaws.com + port_value: 443 + metadata: + filter_metadata: + aigateway.envoy.io: + per_route_rule_backend_name: "anthropic-aws-bedrock" transport_socket: name: envoy.transport_sockets.tls typed_config: @@ -843,6 +928,65 @@ static_resources: filter_metadata: aigateway.envoy.io: per_route_rule_backend_name: "testupstream-gcp-anthropicai" + - name: testupstream-aws-anthropic + connect_timeout: 0.25s + type: STATIC + lb_policy: ROUND_ROBIN + outlier_detection: + consecutive_5xx: 1 + interval: 1s + base_ejection_time: 2s # Must be smaller than the require.Eventually's interval. Otherwise, the tests may pass without going through the fallback since the always-failing backend could be ejected by the time when require.Eventually retries due to the previous request IF the retry is not configured. + max_ejection_percent: 100 + typed_extension_protocol_options: + envoy.extensions.upstreams.http.v3.HttpProtocolOptions: + "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions + explicit_http_config: + http_protocol_options: {} + http_filters: + - name: upstream_extproc + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor + request_attributes: + - xds.upstream_host_metadata + processing_mode: + request_header_mode: "SEND" + request_body_mode: "NONE" + response_header_mode: "SKIP" + response_body_mode: "NONE" + grpc_service: + envoy_grpc: + cluster_name: extproc_cluster + metadataOptions: + receivingNamespaces: + untyped: + - io.envoy.ai_gateway + - name: envoy.filters.http.header_mutation + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.header_mutation.v3.HeaderMutation + mutations: + request_mutations: + - append: + append_action: ADD_IF_ABSENT + header: + key: content-length + value: "%DYNAMIC_METADATA(io.envoy.ai_gateway:content_length)%" + - name: envoy.filters.http.upstream_codec + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.upstream_codec.v3.UpstreamCodec + load_assignment: + cluster_name: testupstream-aws-anthropic + endpoints: + - priority: 0 + lb_endpoints: + - endpoint: + address: + socket_address: + address: 127.0.0.1 + port_value: 8080 + metadata: + filter_metadata: + aigateway.envoy.io: + per_route_rule_backend_name: "testupstream-aws-anthropic" - name: openai connect_timeout: 30s type: STRICT_DNS @@ -918,6 +1062,69 @@ static_resources: typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext auto_host_sni: true + - name: anthropic + connect_timeout: 30s + type: STRICT_DNS + outlier_detection: + consecutive_5xx: 1 + interval: 1s + base_ejection_time: 2s # Must be smaller than the require.Eventually's interval. Otherwise, the tests may pass without going through the fallback since the always-failing backend could be ejected by the time when require.Eventually retries due to the previous request IF the retry is not configured. + max_ejection_percent: 100 + typed_extension_protocol_options: + envoy.extensions.upstreams.http.v3.HttpProtocolOptions: + "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions + explicit_http_config: + http_protocol_options: {} + http_filters: + - name: upstream_extproc + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor + request_attributes: + - xds.upstream_host_metadata + processing_mode: + request_header_mode: "SEND" + request_body_mode: "NONE" + response_header_mode: "SKIP" + response_body_mode: "NONE" + grpc_service: + envoy_grpc: + cluster_name: extproc_cluster + metadataOptions: + receivingNamespaces: + untyped: + - io.envoy.ai_gateway + - name: envoy.filters.http.header_mutation + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.header_mutation.v3.HeaderMutation + mutations: + request_mutations: + - append: + append_action: ADD_IF_ABSENT + header: + key: content-length + value: "%DYNAMIC_METADATA(io.envoy.ai_gateway:content_length)%" + - name: envoy.filters.http.upstream_codec + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.upstream_codec.v3.UpstreamCodec + load_assignment: + cluster_name: anthropic + endpoints: + - lb_endpoints: + - endpoint: + hostname: api.anthropic.com + address: + socket_address: + address: api.anthropic.com + port_value: 443 + metadata: + filter_metadata: + aigateway.envoy.io: + per_route_rule_backend_name: "anthropic" + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + auto_host_sni: true - name: azure_openai connect_timeout: 30s type: STRICT_DNS diff --git a/tests/extproc/extproc_test.go b/tests/extproc/extproc_test.go index 5992b65565..3979e225f5 100644 --- a/tests/extproc/extproc_test.go +++ b/tests/extproc/extproc_test.go @@ -36,6 +36,7 @@ const ( var ( openAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1"} awsBedrockSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAWSBedrock} + awsAnthropicSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAWSAnthropic, Version: "bedrock-2023-05-31"} azureOpenAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAzureOpenAI, Version: "2025-01-01-preview"} gcpVertexAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaGCPVertexAI} gcpAnthropicAISchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaGCPAnthropic, Version: "vertex-2023-10-16"} @@ -44,6 +45,7 @@ var ( grokSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1"} sambaNovaSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1"} deepInfraSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI, Version: "v1/openai"} + anthropicSchema = filterapi.VersionedAPISchema{Name: filterapi.APISchemaAnthropic} testUpstreamOpenAIBackend = filterapi.Backend{Name: "testupstream-openai", Schema: openAISchema} testUpstreamModelNameOverride = filterapi.Backend{Name: "testupstream-modelname-override", ModelNameOverride: "override-model", Schema: openAISchema} @@ -59,7 +61,8 @@ var ( Region: "gcp-region", ProjectName: "gcp-project-name", }}} - alwaysFailingBackend = filterapi.Backend{Name: "always-failing-backend", Schema: openAISchema} + testUpstreamAWSAnthropicBackend = filterapi.Backend{Name: "testupstream-aws-anthropic", Schema: awsAnthropicSchema} + alwaysFailingBackend = filterapi.Backend{Name: "always-failing-backend", Schema: openAISchema} // envoyConfig is the embedded Envoy configuration template. // diff --git a/tests/extproc/real_providers_test.go b/tests/extproc/real_providers_test.go index fc51add5a4..7c3fd4b37c 100644 --- a/tests/extproc/real_providers_test.go +++ b/tests/extproc/real_providers_test.go @@ -14,6 +14,8 @@ import ( "testing" "time" + "github.com/anthropics/anthropic-sdk-go" + anthropicoption "github.com/anthropics/anthropic-sdk-go/option" "github.com/openai/openai-go" "github.com/openai/openai-go/option" "github.com/stretchr/testify/assert" @@ -46,10 +48,17 @@ func TestWithRealProviders(t *testing.T) { {Name: "openai", Schema: openAISchema, Auth: &filterapi.BackendAuth{ APIKey: &filterapi.APIKeyAuth{Key: cc.OpenAIAPIKey}, }}, + {Name: "anthropic", Schema: anthropicSchema, Auth: &filterapi.BackendAuth{ + AnthropicAPIKey: &filterapi.AnthropicAPIKeyAuth{Key: cc.AnthropicAPIKey}, + }}, {Name: "aws-bedrock", Schema: awsBedrockSchema, Auth: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{ CredentialFileLiteral: cc.AWSFileLiteral, Region: "us-east-1", }}}, + {Name: "anthropic-aws-bedrock", Schema: awsAnthropicSchema, Auth: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{ + CredentialFileLiteral: cc.AWSFileLiteral, + Region: "us-east-1", + }}}, {Name: "azure-openai", Schema: azureOpenAISchema, Auth: &filterapi.BackendAuth{ AzureAuth: &filterapi.AzureAuth{AccessToken: cc.AzureAccessToken}, }}, @@ -119,6 +128,17 @@ func TestWithRealProviders(t *testing.T) { }) } }) + t.Run("messages", func(t *testing.T) { + for _, tc := range []realProvidersTestCase{ + {name: "anthropic", modelName: "claude-sonnet-4-5", required: internaltesting.RequiredCredentialAnthropic}, + {name: "anthropic-aws-bedrock", modelName: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", required: internaltesting.RequiredCredentialAWS}, + } { + t.Run(tc.name, func(t *testing.T) { + cc.MaybeSkip(t, tc.required) + requireEventuallyMessagesNonStreamingRequestOK(t, listenerAddress, tc.modelName) + }) + } + }) }) // Read all access logs and check if the used token is logged. @@ -362,6 +382,33 @@ func requireEventuallyChatCompletionNonStreamingRequestOK(t *testing.T, listener }, realProvidersEventuallyTimeout, realProvidersEventuallyInterval) } +func requireEventuallyMessagesNonStreamingRequestOK(t *testing.T, listenerAddress, modelName string) { + client := anthropic.NewClient( + anthropicoption.WithAPIKey("dummy"), + anthropicoption.WithBaseURL(listenerAddress+"/anthropic/"), + ) + internaltesting.RequireEventuallyNoError(t, func() error { + message, err := client.Messages.New(t.Context(), anthropic.MessageNewParams{ + MaxTokens: 1024, + Messages: []anthropic.MessageParam{ + anthropic.NewUserMessage(anthropic.NewTextBlock("Say hi!")), + }, + Model: anthropic.Model(modelName), + }) + if err != nil { + t.Logf("messages error: %v", err) + return fmt.Errorf("messages error: %w", err) + } + + if len(message.Content) == 0 { + return fmt.Errorf("empty message content in response") + } + + t.Logf("response: %+v", message.Content) + return nil + }, realProvidersEventuallyTimeout, realProvidersEventuallyInterval) +} + func requireEventuallyEmbeddingsRequestOK(t *testing.T, listenerAddress, modelName string) { client := openai.NewClient(option.WithBaseURL(listenerAddress + "/v1/")) require.Eventually(t, func() bool { diff --git a/tests/extproc/testupstream_test.go b/tests/extproc/testupstream_test.go index 550ab7d579..0b57520bc6 100644 --- a/tests/extproc/testupstream_test.go +++ b/tests/extproc/testupstream_test.go @@ -61,6 +61,7 @@ func TestWithTestUpstream(t *testing.T) { testUpstreamAzureBackend, testUpstreamGCPVertexAIBackend, testUpstreamGCPAnthropicAIBackend, + testUpstreamAWSAnthropicBackend, { Name: "testupstream-openai-5xx", Schema: openAISchema, HeaderMutation: &filterapi.HTTPHeaderMutation{ Set: []filterapi.HTTPHeader{{Name: testupstreamlib.ResponseStatusKey, Value: "500"}}, @@ -892,7 +893,7 @@ data: {"type": "message_stop"} method: http.MethodPost, expRequestHeaders: map[string]string{"x-api-key": "anthropic-api-key"}, requestBody: `{ - "model": "claude-sonnet-4-5", + "model": "foo", "max_tokens": 1000, "messages": [ { @@ -902,7 +903,7 @@ data: {"type": "message_stop"} ] }`, expPath: "/v1/messages", - responseBody: `{"model":"claude-sonnet-4-5-20250929","id":"msg_01J5gW6Sffiem6avXSAooZZw","type":"message","role":"assistant","content":[{"type":"text","text":"Hi! 👋 How can I help you today?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":16,"service_tier":"standard"}}`, + responseBody: `{"model":"foo","id":"msg_01J5gW6Sffiem6avXSAooZZw","type":"message","role":"assistant","content":[{"type":"text","text":"Hi! 👋 How can I help you today?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":16,"service_tier":"standard"}}`, expStatus: http.StatusOK, }, { @@ -913,7 +914,7 @@ data: {"type": "message_stop"} expRequestHeaders: map[string]string{"x-api-key": "anthropic-api-key"}, responseType: "sse", requestBody: `{ - "model": "claude-sonnet-4-5", + "model": "foo", "max_tokens": 1000, "messages": [ { @@ -925,7 +926,7 @@ data: {"type": "message_stop"} expPath: "/v1/messages", responseBody: ` event: message_start -data: {"type":"message_start","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_01BfvfMsg2gBzwsk6PZRLtDg","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}} } +data: {"type":"message_start","message":{"model":"foo","id":"msg_01BfvfMsg2gBzwsk6PZRLtDg","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}} } event: content_block_start data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} } @@ -953,6 +954,87 @@ data: {"type":"message_stop" } `, expStatus: http.StatusOK, }, + { + name: "aws-anthropic - /anthropic/v1/messages", + backend: "aws-anthropic", + path: "/anthropic/v1/messages", + method: http.MethodPost, + requestBody: `{"model":"anthropic.claude-3-sonnet-20240229-v1:0","max_tokens":100,"messages":[{"role":"user","content":[{"type":"text","text":"Hello from AWS!"}]}],"stream":false}`, + expRequestBody: `{"max_tokens":100,"messages":[{"role":"user","content":[{"type":"text","text":"Hello from AWS!"}]}],"stream":false,"anthropic_version":"bedrock-2023-05-31"}`, + expPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke", + responseStatus: strconv.Itoa(http.StatusOK), + responseBody: `{"id":"msg_aws_123","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hello from AWS Anthropic!"}],"usage":{"input_tokens":10,"output_tokens":20}}`, + expStatus: http.StatusOK, + expResponseBody: `{"id":"msg_aws_123","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hello from AWS Anthropic!"}],"usage":{"input_tokens":10,"output_tokens":20}}`, + }, + { + name: "aws-anthropic - /anthropic/v1/messages - streaming", + backend: "aws-anthropic", + path: "/anthropic/v1/messages", + method: http.MethodPost, + responseType: "sse", + requestBody: `{"model":"anthropic.claude-3-haiku-20240307-v1:0","max_tokens":150,"messages":[{"role":"user","content":[{"type":"text","text":"Tell me a joke"}]}],"stream":true}`, + expRequestBody: `{"max_tokens":150,"messages":[{"role":"user","content":[{"type":"text","text":"Tell me a joke"}]}],"stream":true,"anthropic_version":"bedrock-2023-05-31"}`, + expPath: "/model/anthropic.claude-3-haiku-20240307-v1:0/invoke-stream", + responseStatus: strconv.Itoa(http.StatusOK), + responseBody: `event: message_start +data: {"type":"message_start","message":{"id":"msg_aws_456","usage":{"input_tokens":12}}} + +event: content_block_start +data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Why did the"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" chicken cross the road?"}} + +event: content_block_stop +data: {"type":"content_block_stop","index":0} + +event: message_delta +data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":18}} + +event: message_stop +data: {"type":"message_stop"} + +`, + expStatus: http.StatusOK, + expResponseBody: `event: message_start +data: {"type":"message_start","message":{"id":"msg_aws_456","usage":{"input_tokens":12}}} + +event: content_block_start +data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Why did the"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" chicken cross the road?"}} + +event: content_block_stop +data: {"type":"content_block_stop","index":0} + +event: message_delta +data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":18}} + +event: message_stop +data: {"type":"message_stop"} + +`, + }, + { + name: "aws-anthropic - /anthropic/v1/messages - error response", + backend: "aws-anthropic", + path: "/anthropic/v1/messages", + method: http.MethodPost, + requestBody: `{"model":"anthropic.claude-3-sonnet-20240229-v1:0","max_tokens":100,"messages":[{"role":"user","content":[{"type":"text","text":"Test error"}]}]}`, + expPath: "/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke", + responseStatus: strconv.Itoa(http.StatusBadRequest), + responseBody: `{"type":"error","error":{"type":"validation_error","message":"Invalid request format"}}`, + expStatus: http.StatusBadRequest, + expResponseBody: `{"type":"error","error":{"type":"validation_error","message":"Invalid request format"}}`, + }, } { t.Run(tc.name, func(t *testing.T) { listenerAddress := fmt.Sprintf("http://localhost:%d", listenerPort)