diff --git a/README.md b/README.md index 8195432..5d319cb 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ try { }, ], }, - Provider.OpenAI + Provider.openai ); // Provider is optional console.log('Response:', response.choices[0].message.content); @@ -159,7 +159,7 @@ try { onFinish: () => console.log('\nStream completed'), onError: (error) => console.error('Stream error:', error), }, - Provider.Groq // Provider is optional + Provider.groq // Provider is optional ); } catch (error) { console.error('Error:', error); @@ -241,7 +241,7 @@ const client = new InferenceGatewayClient({ }); try { - const response = await client.proxy(Provider.OpenAI, 'embeddings', { + const response = await client.proxy(Provider.openai, 'embeddings', { method: 'POST', body: JSON.stringify({ model: 'text-embedding-ada-002', diff --git a/Taskfile.yml b/Taskfile.yml index 9fbe27a..2b7aa0a 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -5,7 +5,7 @@ tasks: oas-download: desc: Download OpenAPI specification cmds: - - curl -o openapi.yaml https://raw.githubusercontent.com/inference-gateway/inference-gateway/refs/heads/main/openapi.yaml + - curl -o openapi.yaml https://raw.githubusercontent.com/inference-gateway/schemas/refs/heads/main/openapi.yaml lint: desc: Lint the SDK diff --git a/examples/.env.example b/examples/.env.example index ae6a429..fed632d 100644 --- a/examples/.env.example +++ b/examples/.env.example @@ -1,8 +1,10 @@ # General settings -ENVIRONMENT=development -ENABLE_TELEMETRY=false -ENABLE_AUTH=false +ENVIRONMENT=production +ALLOWED_MODELS= +# Telemetry +TELEMETRY_ENABLE=false +TELEMETRY_METRICS_PORT=9464 # Model Context Protocol (MCP) MCP_ENABLE=false MCP_EXPOSE=false @@ -13,10 +15,35 @@ MCP_TLS_HANDSHAKE_TIMEOUT=3s MCP_RESPONSE_HEADER_TIMEOUT=3s MCP_EXPECT_CONTINUE_TIMEOUT=1s MCP_REQUEST_TIMEOUT=5s -# OpenID Connect -OIDC_ISSUER_URL=http://keycloak:8080/realms/inference-gateway-realm -OIDC_CLIENT_ID=inference-gateway-client -OIDC_CLIENT_SECRET= +MCP_MAX_RETRIES=3 +MCP_RETRY_INTERVAL=5s +MCP_INITIAL_BACKOFF=1s +MCP_ENABLE_RECONNECT=true +MCP_RECONNECT_INTERVAL=30s +MCP_POLLING_ENABLE=true +MCP_POLLING_INTERVAL=30s +MCP_POLLING_TIMEOUT=5s +MCP_DISABLE_HEALTHCHECK_LOGS=true +# Agent-to-Agent (A2A) Protocol +A2A_ENABLE=false +A2A_EXPOSE=false +A2A_AGENTS= +A2A_CLIENT_TIMEOUT=30s +A2A_POLLING_ENABLE=true +A2A_POLLING_INTERVAL=1s +A2A_POLLING_TIMEOUT=30s +A2A_MAX_POLL_ATTEMPTS=30 +A2A_MAX_RETRIES=3 +A2A_RETRY_INTERVAL=5s +A2A_INITIAL_BACKOFF=1s +A2A_ENABLE_RECONNECT=true +A2A_RECONNECT_INTERVAL=30s +A2A_DISABLE_HEALTHCHECK_LOGS=true +# Authentication +AUTH_ENABLE=false +AUTH_OIDC_ISSUER=http://keycloak:8080/realms/inference-gateway-realm +AUTH_OIDC_CLIENT_ID=inference-gateway-client +AUTH_OIDC_CLIENT_SECRET= # Server settings SERVER_HOST=0.0.0.0 SERVER_PORT=8080 @@ -31,6 +58,9 @@ CLIENT_MAX_IDLE_CONNS=20 CLIENT_MAX_IDLE_CONNS_PER_HOST=20 CLIENT_IDLE_CONN_TIMEOUT=30s CLIENT_TLS_MIN_VERSION=TLS12 +CLIENT_DISABLE_COMPRESSION=true +CLIENT_RESPONSE_HEADER_TIMEOUT=10s +CLIENT_EXPECT_CONTINUE_TIMEOUT=1s # Providers ANTHROPIC_API_URL=https://api.anthropic.com/v1 ANTHROPIC_API_KEY= @@ -46,3 +76,5 @@ OPENAI_API_URL=https://api.openai.com/v1 OPENAI_API_KEY= DEEPSEEK_API_URL=https://api.deepseek.com DEEPSEEK_API_KEY= +GOOGLE_API_URL=https://generativelanguage.googleapis.com/v1beta/openai +GOOGLE_API_KEY= \ No newline at end of file diff --git a/openapi.yaml b/openapi.yaml index 6bcde18..317a74c 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -29,8 +29,10 @@ tags: description: List and describe the various models available in the API. - name: Completions description: Generate completions from the models. - - name: Tools + - name: MCP description: List and manage MCP tools. + - name: A2A + description: List and manage A2A agents. - name: Proxy description: Proxy requests to provider endpoints. - name: Health @@ -158,7 +160,7 @@ paths: get: operationId: listTools tags: - - Tools + - MCP description: | Lists the currently available MCP tools. Only accessible when EXPOSE_MCP is enabled. summary: Lists the currently available MCP tools @@ -177,6 +179,65 @@ paths: $ref: '#/components/responses/MCPNotExposed' '500': $ref: '#/components/responses/InternalError' + /a2a/agents: + get: + operationId: listAgents + tags: + - A2A + description: | + Lists the currently available A2A agents. Only accessible when EXPOSE_A2A is enabled. + summary: Lists the currently available A2A agents + security: + - bearerAuth: [] + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '#/components/schemas/ListAgentsResponse' + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/A2ANotExposed' + '500': + $ref: '#/components/responses/InternalError' + /a2a/agents/{id}: + get: + operationId: getAgent + tags: + - A2A + description: | + Gets a specific A2A agent by its unique identifier. Only accessible when EXPOSE_A2A is enabled. + summary: Gets a specific A2A agent by ID + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + description: The unique identifier of the agent + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '#/components/schemas/A2AAgentCard' + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/A2ANotExposed' + '404': + description: Agent not found + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + '500': + $ref: '#/components/responses/InternalError' /proxy/{provider}/{path}: parameters: - name: provider @@ -390,6 +451,14 @@ components: $ref: '#/components/schemas/Error' example: error: 'MCP tools endpoint is not exposed. Set EXPOSE_MCP=true to enable.' + A2ANotExposed: + description: A2A agents endpoint is not exposed + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + error: 'A2A agents endpoint is not exposed. Set EXPOSE_A2A=true to enable.' ProviderResponse: description: | ProviderResponse depends on the specific provider and endpoint being called @@ -427,7 +496,7 @@ components: bearerFormat: JWT description: | Authentication is optional by default. - To enable authentication, set ENABLE_AUTH to true. + To enable authentication, set AUTH_ENABLE to true. When enabled, requests must include a valid JWT token in the Authorization header. schemas: Provider: @@ -440,6 +509,7 @@ components: - cohere - anthropic - deepseek + - google x-provider-configs: ollama: id: 'ollama' @@ -457,7 +527,7 @@ components: anthropic: id: 'anthropic' url: 'https://api.anthropic.com/v1' - auth_type: 'bearer' + auth_type: 'xheader' endpoints: models: name: 'list_models' @@ -532,6 +602,19 @@ components: name: 'chat_completions' method: 'POST' endpoint: '/chat/completions' + google: + id: 'google' + url: 'https://generativelanguage.googleapis.com/v1beta/openai' + auth_type: 'bearer' + endpoints: + models: + name: 'list_models' + method: 'GET' + endpoint: '/models' + chat: + name: 'chat_completions' + method: 'POST' + endpoint: '/chat/completions' ProviderSpecificResponse: type: object description: | @@ -696,6 +779,103 @@ components: required: - object - data + ListAgentsResponse: + type: object + description: Response structure for listing A2A agents + properties: + object: + type: string + description: Always "list" + example: 'list' + data: + type: array + items: + $ref: '#/components/schemas/A2AAgentCard' + default: [] + description: Array of available A2A agents + required: + - object + - data + A2AAgentCard: + description: |- + An AgentCard conveys key information: + - Overall details (version, name, description, uses) + - Skills: A set of capabilities the agent can perform + - Default modalities/content types supported by the agent. + - Authentication requirements + properties: + capabilities: + additionalProperties: true + description: Optional capabilities supported by the agent. + defaultInputModes: + description: |- + The set of interaction modes that the agent supports across all skills. This can be overridden per-skill. + Supported media types for input. + items: + type: string + type: array + defaultOutputModes: + description: Supported media types for output. + items: + type: string + type: array + description: + description: |- + A human-readable description of the agent. Used to assist users and + other agents in understanding what the agent can do. + type: string + documentationUrl: + description: A URL to documentation for the agent. + type: string + iconUrl: + description: A URL to an icon for the agent. + type: string + id: + description: Unique identifier for the agent (base64-encoded SHA256 hash of the agent URL). + type: string + name: + description: Human readable name of the agent. + type: string + provider: + additionalProperties: true + description: The service provider of the agent + security: + description: Security requirements for contacting the agent. + items: + additionalProperties: true + type: object + type: array + securitySchemes: + additionalProperties: true + description: Security scheme details used for authenticating with this agent. + type: object + skills: + description: Skills are a unit of capability that an agent can perform. + items: + additionalProperties: true + type: array + supportsAuthenticatedExtendedCard: + description: |- + true if the agent supports providing an extended agent card when the user is authenticated. + Defaults to false if not specified. + type: boolean + url: + description: A URL to the address the agent is hosted at. + type: string + version: + description: The version of the agent - format is up to the provider. + type: string + required: + - capabilities + - defaultInputModes + - defaultOutputModes + - description + - id + - name + - skills + - url + - version + type: object MCPTool: type: object description: An MCP tool definition @@ -1035,19 +1215,7 @@ components: type: string description: The type of the tool. Currently, only `function` is supported. function: - type: object - properties: - name: - type: string - description: The name of the function to call. - arguments: - type: string - description: - The arguments to call the function with, as generated by the model - in JSON format. Note that the model does not always generate - valid JSON, and may hallucinate parameters not defined by your - function schema. Validate the arguments in your code before - calling your function. + $ref: '#/components/schemas/ChatCompletionMessageToolCallFunction' required: - index ChatCompletionTokenLogprob: @@ -1179,16 +1347,24 @@ components: type: string default: 'production' description: 'The environment' - - name: enable_telemetry - env: 'ENABLE_TELEMETRY' + - name: allowed_models + env: 'ALLOWED_MODELS' + type: string + default: '' + description: 'Comma-separated list of models to allow. If empty, all models will be available' + - telemetry: + title: 'Telemetry' + settings: + - name: telemetry_enable + env: 'TELEMETRY_ENABLE' type: bool default: 'false' description: 'Enable telemetry' - - name: enable_auth - env: 'ENABLE_AUTH' - type: bool - default: 'false' - description: 'Enable authentication' + - name: telemetry_metrics_port + env: 'TELEMETRY_METRICS_PORT' + type: string + default: '9464' + description: 'Port for telemetry metrics server' - mcp: title: 'Model Context Protocol (MCP)' settings: @@ -1236,22 +1412,144 @@ components: type: time.Duration default: '5s' description: 'MCP client request timeout for initialize and tool calls' - - oidc: - title: 'OpenID Connect' + - name: mcp_max_retries + env: 'MCP_MAX_RETRIES' + type: int + default: '3' + description: 'Maximum number of connection retry attempts' + - name: mcp_retry_interval + env: 'MCP_RETRY_INTERVAL' + type: time.Duration + default: '5s' + description: 'Interval between connection retry attempts' + - name: mcp_initial_backoff + env: 'MCP_INITIAL_BACKOFF' + type: time.Duration + default: '1s' + description: 'Initial backoff duration for exponential backoff retry' + - name: mcp_enable_reconnect + env: 'MCP_ENABLE_RECONNECT' + type: bool + default: 'true' + description: 'Enable automatic reconnection for failed servers' + - name: mcp_reconnect_interval + env: 'MCP_RECONNECT_INTERVAL' + type: time.Duration + default: '30s' + description: 'Interval between reconnection attempts' + - name: mcp_polling_enable + env: 'MCP_POLLING_ENABLE' + type: bool + default: 'true' + description: 'Enable health check polling' + - name: mcp_polling_interval + env: 'MCP_POLLING_INTERVAL' + type: time.Duration + default: '30s' + description: 'Interval between health check polling requests' + - name: mcp_polling_timeout + env: 'MCP_POLLING_TIMEOUT' + type: time.Duration + default: '5s' + description: 'Timeout for individual health check requests' + - name: mcp_disable_healthcheck_logs + env: 'MCP_DISABLE_HEALTHCHECK_LOGS' + type: bool + default: 'true' + description: 'Disable health check log messages to reduce noise' + - a2a: + title: 'Agent-to-Agent (A2A) Protocol' + settings: + - name: a2a_enable + env: 'A2A_ENABLE' + type: bool + default: 'false' + description: 'Enable A2A protocol support' + - name: a2a_expose + env: 'A2A_EXPOSE' + type: bool + default: 'false' + description: 'Expose A2A agents list cards endpoint' + - name: a2a_agents + env: 'A2A_AGENTS' + type: string + description: 'Comma-separated list of A2A agent URLs' + - name: a2a_client_timeout + env: 'A2A_CLIENT_TIMEOUT' + type: time.Duration + default: '30s' + description: 'A2A client timeout' + - name: a2a_polling_enable + env: 'A2A_POLLING_ENABLE' + type: bool + default: 'true' + description: 'Enable task status polling' + - name: a2a_polling_interval + env: 'A2A_POLLING_INTERVAL' + type: time.Duration + default: '1s' + description: 'Interval between polling requests' + - name: a2a_polling_timeout + env: 'A2A_POLLING_TIMEOUT' + type: time.Duration + default: '30s' + description: 'Maximum time to wait for task completion' + - name: a2a_max_poll_attempts + env: 'A2A_MAX_POLL_ATTEMPTS' + type: int + default: '30' + description: 'Maximum number of polling attempts' + - name: a2a_max_retries + env: 'A2A_MAX_RETRIES' + type: int + default: '3' + description: 'Maximum number of connection retry attempts' + - name: a2a_retry_interval + env: 'A2A_RETRY_INTERVAL' + type: time.Duration + default: '5s' + description: 'Interval between connection retry attempts' + - name: a2a_initial_backoff + env: 'A2A_INITIAL_BACKOFF' + type: time.Duration + default: '1s' + description: 'Initial backoff duration for exponential backoff retry' + - name: a2a_enable_reconnect + env: 'A2A_ENABLE_RECONNECT' + type: bool + default: 'true' + description: 'Enable automatic reconnection for failed agents' + - name: a2a_reconnect_interval + env: 'A2A_RECONNECT_INTERVAL' + type: time.Duration + default: '30s' + description: 'Interval between reconnection attempts' + - name: a2a_disable_healthcheck_logs + env: 'A2A_DISABLE_HEALTHCHECK_LOGS' + type: bool + default: 'true' + description: 'Disable health check log messages to reduce noise' + - auth: + title: 'Authentication' settings: - - name: issuer_url - env: 'OIDC_ISSUER_URL' + - name: auth_enable + env: 'AUTH_ENABLE' + type: bool + default: 'false' + description: 'Enable authentication' + - name: auth_oidc_issuer + env: 'AUTH_OIDC_ISSUER' type: string default: 'http://keycloak:8080/realms/inference-gateway-realm' description: 'OIDC issuer URL' - - name: client_id - env: 'OIDC_CLIENT_ID' + - name: auth_oidc_client_id + env: 'AUTH_OIDC_CLIENT_ID' type: string default: 'inference-gateway-client' description: 'OIDC client ID' secret: true - - name: client_secret - env: 'OIDC_CLIENT_SECRET' + - name: auth_oidc_client_secret + env: 'AUTH_OIDC_CLIENT_SECRET' type: string description: 'OIDC client secret' secret: true @@ -1319,6 +1617,21 @@ components: type: string default: 'TLS12' description: 'Minimum TLS version' + - name: disable_compression + env: 'CLIENT_DISABLE_COMPRESSION' + type: bool + default: 'true' + description: 'Disable compression for faster streaming' + - name: response_header_timeout + env: 'CLIENT_RESPONSE_HEADER_TIMEOUT' + type: time.Duration + default: '10s' + description: 'Response header timeout' + - name: expect_continue_timeout + env: 'CLIENT_EXPECT_CONTINUE_TIMEOUT' + type: time.Duration + default: '1s' + description: 'Expect continue timeout' - providers: title: 'Providers' settings: @@ -1392,3 +1705,13 @@ components: type: string description: 'DeepSeek API Key' secret: true + - name: google_api_url + env: 'GOOGLE_API_URL' + type: string + default: 'https://generativelanguage.googleapis.com/v1beta/openai' + description: 'Google API URL' + - name: google_api_key + env: 'GOOGLE_API_KEY' + type: string + description: 'Google API Key' + secret: true diff --git a/src/types/generated/index.ts b/src/types/generated/index.ts index b3470a2..74209b7 100644 --- a/src/types/generated/index.ts +++ b/src/types/generated/index.ts @@ -69,6 +69,48 @@ export interface paths { patch?: never; trace?: never; }; + '/a2a/agents': { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** + * Lists the currently available A2A agents + * @description Lists the currently available A2A agents. Only accessible when EXPOSE_A2A is enabled. + * + */ + get: operations['listAgents']; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + '/a2a/agents/{id}': { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** + * Gets a specific A2A agent by ID + * @description Gets a specific A2A agent by its unique identifier. Only accessible when EXPOSE_A2A is enabled. + * + */ + get: operations['getAgent']; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; '/proxy/{provider}/{path}': { parameters: { query?: never; @@ -253,6 +295,69 @@ export interface components { */ data: components['schemas']['MCPTool'][]; }; + /** @description Response structure for listing A2A agents */ + ListAgentsResponse: { + /** + * @description Always "list" + * @example list + */ + object: string; + /** + * @description Array of available A2A agents + * @default [] + */ + data: components['schemas']['A2AAgentCard'][]; + }; + /** @description An AgentCard conveys key information: + * - Overall details (version, name, description, uses) + * - Skills: A set of capabilities the agent can perform + * - Default modalities/content types supported by the agent. + * - Authentication requirements */ + A2AAgentCard: { + /** @description Optional capabilities supported by the agent. */ + capabilities: { + [key: string]: unknown; + }; + /** @description The set of interaction modes that the agent supports across all skills. This can be overridden per-skill. + * Supported media types for input. */ + defaultInputModes: string[]; + /** @description Supported media types for output. */ + defaultOutputModes: string[]; + /** @description A human-readable description of the agent. Used to assist users and + * other agents in understanding what the agent can do. */ + description: string; + /** @description A URL to documentation for the agent. */ + documentationUrl?: string; + /** @description A URL to an icon for the agent. */ + iconUrl?: string; + /** @description Unique identifier for the agent (base64-encoded SHA256 hash of the agent URL). */ + id: string; + /** @description Human readable name of the agent. */ + name: string; + /** @description The service provider of the agent */ + provider?: { + [key: string]: unknown; + }; + /** @description Security requirements for contacting the agent. */ + security?: { + [key: string]: unknown; + }[]; + /** @description Security scheme details used for authenticating with this agent. */ + securitySchemes?: { + [key: string]: unknown; + }; + /** @description Skills are a unit of capability that an agent can perform. */ + skills: { + [key: string]: unknown; + }[]; + /** @description true if the agent supports providing an extended agent card when the user is authenticated. + * Defaults to false if not specified. */ + supportsAuthenticatedExtendedCard?: boolean; + /** @description A URL to the address the agent is hosted at. */ + url: string; + /** @description The version of the agent - format is up to the provider. */ + version: string; + }; /** @description An MCP tool definition */ MCPTool: { /** @@ -440,12 +545,7 @@ export interface components { id?: string; /** @description The type of the tool. Currently, only `function` is supported. */ type?: string; - function?: { - /** @description The name of the function to call. */ - name?: string; - /** @description The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. */ - arguments?: string; - }; + function?: components['schemas']['ChatCompletionMessageToolCallFunction']; }; ChatCompletionTokenLogprob: { /** @description The token. */ @@ -541,6 +641,18 @@ export interface components { 'application/json': components['schemas']['Error']; }; }; + /** @description A2A agents endpoint is not exposed */ + A2ANotExposed: { + headers: { + [name: string]: unknown; + }; + content: { + /** @example { + * "error": "A2A agents endpoint is not exposed. Set EXPOSE_A2A=true to enable." + * } */ + 'application/json': components['schemas']['Error']; + }; + }; /** @description ProviderResponse depends on the specific provider and endpoint being called * If you decide to use this approach, please follow the provider-specific documentations. * */ @@ -600,6 +712,9 @@ export type SchemaListModelsResponse = components['schemas']['ListModelsResponse']; export type SchemaListToolsResponse = components['schemas']['ListToolsResponse']; +export type SchemaListAgentsResponse = + components['schemas']['ListAgentsResponse']; +export type SchemaA2AAgentCard = components['schemas']['A2AAgentCard']; export type SchemaMcpTool = components['schemas']['MCPTool']; export type SchemaFunctionObject = components['schemas']['FunctionObject']; export type SchemaChatCompletionTool = @@ -637,6 +752,7 @@ export type ResponseBadRequest = components['responses']['BadRequest']; export type ResponseUnauthorized = components['responses']['Unauthorized']; export type ResponseInternalError = components['responses']['InternalError']; export type ResponseMcpNotExposed = components['responses']['MCPNotExposed']; +export type ResponseA2ANotExposed = components['responses']['A2ANotExposed']; export type ResponseProviderResponse = components['responses']['ProviderResponse']; export type RequestBodyProviderRequest = @@ -720,6 +836,64 @@ export interface operations { 500: components['responses']['InternalError']; }; }; + listAgents: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + 'application/json': components['schemas']['ListAgentsResponse']; + }; + }; + 401: components['responses']['Unauthorized']; + 403: components['responses']['A2ANotExposed']; + 500: components['responses']['InternalError']; + }; + }; + getAgent: { + parameters: { + query?: never; + header?: never; + path: { + /** @description The unique identifier of the agent */ + id: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + 'application/json': components['schemas']['A2AAgentCard']; + }; + }; + 401: components['responses']['Unauthorized']; + 403: components['responses']['A2ANotExposed']; + /** @description Agent not found */ + 404: { + headers: { + [name: string]: unknown; + }; + content: { + 'application/json': components['schemas']['Error']; + }; + }; + 500: components['responses']['InternalError']; + }; + }; proxyGet: { parameters: { query?: never; @@ -842,6 +1016,7 @@ export enum Provider { cohere = 'cohere', anthropic = 'anthropic', deepseek = 'deepseek', + google = 'google', } export enum ProviderAuthType { bearer = 'bearer',