From 5f924aba8d570c606e3686eb1132c15d440ff54e Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Fri, 7 Nov 2025 21:05:35 +0000
Subject: [PATCH 01/37] move dataclass into models

---
 ldai/__init__.py                  |  25 ++++
 ldai/client.py                    | 205 ++----------------------------
 ldai/models.py                    | 197 ++++++++++++++++++++++++++++
 ldai/testing/test_agents.py       |   4 +-
 ldai/testing/test_model_config.py |   2 +-
 5 files changed, 236 insertions(+), 197 deletions(-)
 create mode 100644 ldai/models.py

diff --git a/ldai/__init__.py b/ldai/__init__.py
index cb7e545..91b3a2d 100644
--- a/ldai/__init__.py
+++ b/ldai/__init__.py
@@ -1 +1,26 @@
 __version__ = "0.10.1"  # x-release-please-version
+
+# Export main client
+from ldai.client import LDAIClient
+
+# Export models for convenience
+from ldai.models import (
+    AIConfig,
+    LDAIAgent,
+    LDAIAgentConfig,
+    LDAIAgentDefaults,
+    LDMessage,
+    ModelConfig,
+    ProviderConfig,
+)
+
+__all__ = [
+    'LDAIClient',
+    'AIConfig',
+    'LDAIAgent',
+    'LDAIAgentConfig',
+    'LDAIAgentDefaults',
+    'LDMessage',
+    'ModelConfig',
+    'ProviderConfig',
+]
diff --git a/ldai/client.py b/ldai/client.py
index a8bd888..db2a6ad 100644
--- a/ldai/client.py
+++ b/ldai/client.py
@@ -1,205 +1,22 @@
-from dataclasses import dataclass
-from typing import Any, Dict, List, Literal, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 import chevron
 from ldclient import Context
 from ldclient.client import LDClient
 
+from ldai.models import (
+    AIConfig,
+    LDAIAgent,
+    LDAIAgentConfig,
+    LDAIAgentDefaults,
+    LDAIAgents,
+    LDMessage,
+    ModelConfig,
+    ProviderConfig,
+)
 from ldai.tracker import LDAIConfigTracker
 
 
-@dataclass
-class LDMessage:
-    role: Literal['system', 'user', 'assistant']
-    content: str
-
-    def to_dict(self) -> dict:
-        """
-        Render the given message as a dictionary object.
-        """
-        return {
-            'role': self.role,
-            'content': self.content,
-        }
-
-
-class ModelConfig:
-    """
-    Configuration related to the model.
-    """
-
-    def __init__(self, name: str, parameters: Optional[Dict[str, Any]] = None, custom: Optional[Dict[str, Any]] = None):
-        """
-        :param name: The name of the model.
-        :param parameters: Additional model-specific parameters.
-        :param custom: Additional customer provided data.
-        """
-        self._name = name
-        self._parameters = parameters
-        self._custom = custom
-
-    @property
-    def name(self) -> str:
-        """
-        The name of the model.
-        """
-        return self._name
-
-    def get_parameter(self, key: str) -> Any:
-        """
-        Retrieve model-specific parameters.
-
-        Accessing a named, typed attribute (e.g. name) will result in the call
-        being delegated to the appropriate property.
-        """
-        if key == 'name':
-            return self.name
-
-        if self._parameters is None:
-            return None
-
-        return self._parameters.get(key)
-
-    def get_custom(self, key: str) -> Any:
-        """
-        Retrieve customer provided data.
-        """
-        if self._custom is None:
-            return None
-
-        return self._custom.get(key)
-
-    def to_dict(self) -> dict:
-        """
-        Render the given model config as a dictionary object.
-        """
-        return {
-            'name': self._name,
-            'parameters': self._parameters,
-            'custom': self._custom,
-        }
-
-
-class ProviderConfig:
-    """
-    Configuration related to the provider.
-    """
-
-    def __init__(self, name: str):
-        self._name = name
-
-    @property
-    def name(self) -> str:
-        """
-        The name of the provider.
-        """
-        return self._name
-
-    def to_dict(self) -> dict:
-        """
-        Render the given provider config as a dictionary object.
-        """
-        return {
-            'name': self._name,
-        }
-
-
-@dataclass(frozen=True)
-class AIConfig:
-    enabled: Optional[bool] = None
-    model: Optional[ModelConfig] = None
-    messages: Optional[List[LDMessage]] = None
-    provider: Optional[ProviderConfig] = None
-
-    def to_dict(self) -> dict:
-        """
-        Render the given default values as an AIConfig-compatible dictionary object.
-        """
-        return {
-            '_ldMeta': {
-                'enabled': self.enabled or False,
-            },
-            'model': self.model.to_dict() if self.model else None,
-            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
-            'provider': self.provider.to_dict() if self.provider else None,
-        }
-
-
-@dataclass(frozen=True)
-class LDAIAgent:
-    """
-    Represents an AI agent configuration with instructions and model settings.
-
-    An agent is similar to an AIConfig but focuses on instructions rather than messages,
-    making it suitable for AI assistant/agent use cases.
-    """
-    enabled: Optional[bool] = None
-    model: Optional[ModelConfig] = None
-    provider: Optional[ProviderConfig] = None
-    instructions: Optional[str] = None
-    tracker: Optional[LDAIConfigTracker] = None
-
-    def to_dict(self) -> Dict[str, Any]:
-        """
-        Render the given agent as a dictionary object.
-        """
-        result: Dict[str, Any] = {
-            '_ldMeta': {
-                'enabled': self.enabled or False,
-            },
-            'model': self.model.to_dict() if self.model else None,
-            'provider': self.provider.to_dict() if self.provider else None,
-        }
-        if self.instructions is not None:
-            result['instructions'] = self.instructions
-        return result
-
-
-@dataclass(frozen=True)
-class LDAIAgentDefaults:
-    """
-    Default values for AI agent configurations.
-
-    Similar to LDAIAgent but without tracker and with optional enabled field,
-    used as fallback values when agent configurations are not available.
-    """
-    enabled: Optional[bool] = None
-    model: Optional[ModelConfig] = None
-    provider: Optional[ProviderConfig] = None
-    instructions: Optional[str] = None
-
-    def to_dict(self) -> Dict[str, Any]:
-        """
-        Render the given agent defaults as a dictionary object.
-        """
-        result: Dict[str, Any] = {
-            '_ldMeta': {
-                'enabled': self.enabled or False,
-            },
-            'model': self.model.to_dict() if self.model else None,
-            'provider': self.provider.to_dict() if self.provider else None,
-        }
-        if self.instructions is not None:
-            result['instructions'] = self.instructions
-        return result
-
-
-@dataclass
-class LDAIAgentConfig:
-    """
-    Configuration for individual agent in batch requests.
-
-    Combines agent key with its specific default configuration and variables.
-    """
-    key: str
-    default_value: LDAIAgentDefaults
-    variables: Optional[Dict[str, Any]] = None
-
-
-# Type alias for multiple agents
-LDAIAgents = Dict[str, LDAIAgent]
-
-
 class LDAIClient:
     """The LaunchDarkly AI SDK client object."""
 
diff --git a/ldai/models.py b/ldai/models.py
new file mode 100644
index 0000000..4eef5a2
--- /dev/null
+++ b/ldai/models.py
@@ -0,0 +1,197 @@
+from dataclasses import dataclass
+from typing import Any, Dict, List, Literal, Optional
+
+from ldai.tracker import LDAIConfigTracker
+
+
+@dataclass
+class LDMessage:
+    role: Literal['system', 'user', 'assistant']
+    content: str
+
+    def to_dict(self) -> dict:
+        """
+        Render the given message as a dictionary object.
+        """
+        return {
+            'role': self.role,
+            'content': self.content,
+        }
+
+
+class ModelConfig:
+    """
+    Configuration related to the model.
+    """
+
+    def __init__(self, name: str, parameters: Optional[Dict[str, Any]] = None, custom: Optional[Dict[str, Any]] = None):
+        """
+        :param name: The name of the model.
+        :param parameters: Additional model-specific parameters.
+        :param custom: Additional customer provided data.
+        """
+        self._name = name
+        self._parameters = parameters
+        self._custom = custom
+
+    @property
+    def name(self) -> str:
+        """
+        The name of the model.
+        """
+        return self._name
+
+    def get_parameter(self, key: str) -> Any:
+        """
+        Retrieve model-specific parameters.
+
+        Accessing a named, typed attribute (e.g. name) will result in the call
+        being delegated to the appropriate property.
+        """
+        if key == 'name':
+            return self.name
+
+        if self._parameters is None:
+            return None
+
+        return self._parameters.get(key)
+
+    def get_custom(self, key: str) -> Any:
+        """
+        Retrieve customer provided data.
+        """
+        if self._custom is None:
+            return None
+
+        return self._custom.get(key)
+
+    def to_dict(self) -> dict:
+        """
+        Render the given model config as a dictionary object.
+        """
+        return {
+            'name': self._name,
+            'parameters': self._parameters,
+            'custom': self._custom,
+        }
+
+
+class ProviderConfig:
+    """
+    Configuration related to the provider.
+    """
+
+    def __init__(self, name: str):
+        self._name = name
+
+    @property
+    def name(self) -> str:
+        """
+        The name of the provider.
+        """
+        return self._name
+
+    def to_dict(self) -> dict:
+        """
+        Render the given provider config as a dictionary object.
+        """
+        return {
+            'name': self._name,
+        }
+
+
+@dataclass(frozen=True)
+class AIConfig:
+    enabled: Optional[bool] = None
+    model: Optional[ModelConfig] = None
+    messages: Optional[List[LDMessage]] = None
+    provider: Optional[ProviderConfig] = None
+
+    def to_dict(self) -> dict:
+        """
+        Render the given default values as an AIConfig-compatible dictionary object.
+        """
+        return {
+            '_ldMeta': {
+                'enabled': self.enabled or False,
+            },
+            'model': self.model.to_dict() if self.model else None,
+            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
+            'provider': self.provider.to_dict() if self.provider else None,
+        }
+
+
+@dataclass(frozen=True)
+class LDAIAgent:
+    """
+    Represents an AI agent configuration with instructions and model settings.
+
+    An agent is similar to an AIConfig but focuses on instructions rather than messages,
+    making it suitable for AI assistant/agent use cases.
+    """
+    enabled: Optional[bool] = None
+    model: Optional[ModelConfig] = None
+    provider: Optional[ProviderConfig] = None
+    instructions: Optional[str] = None
+    tracker: Optional[LDAIConfigTracker] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Render the given agent as a dictionary object.
+        """
+        result: Dict[str, Any] = {
+            '_ldMeta': {
+                'enabled': self.enabled or False,
+            },
+            'model': self.model.to_dict() if self.model else None,
+            'provider': self.provider.to_dict() if self.provider else None,
+        }
+        if self.instructions is not None:
+            result['instructions'] = self.instructions
+        return result
+
+
+@dataclass(frozen=True)
+class LDAIAgentDefaults:
+    """
+    Default values for AI agent configurations.
+
+    Similar to LDAIAgent but without tracker and with optional enabled field,
+    used as fallback values when agent configurations are not available.
+    """
+    enabled: Optional[bool] = None
+    model: Optional[ModelConfig] = None
+    provider: Optional[ProviderConfig] = None
+    instructions: Optional[str] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Render the given agent defaults as a dictionary object.
+        """
+        result: Dict[str, Any] = {
+            '_ldMeta': {
+                'enabled': self.enabled or False,
+            },
+            'model': self.model.to_dict() if self.model else None,
+            'provider': self.provider.to_dict() if self.provider else None,
+        }
+        if self.instructions is not None:
+            result['instructions'] = self.instructions
+        return result
+
+
+@dataclass
+class LDAIAgentConfig:
+    """
+    Configuration for individual agent in batch requests.
+
+    Combines agent key with its specific default configuration and variables.
+    """
+    key: str
+    default_value: LDAIAgentDefaults
+    variables: Optional[Dict[str, Any]] = None
+
+
+# Type alias for multiple agents
+LDAIAgents = Dict[str, LDAIAgent]
+
diff --git a/ldai/testing/test_agents.py b/ldai/testing/test_agents.py
index b2e80c0..755f2e5 100644
--- a/ldai/testing/test_agents.py
+++ b/ldai/testing/test_agents.py
@@ -2,8 +2,8 @@
 from ldclient import Config, Context, LDClient
 from ldclient.integrations.test_data import TestData
 
-from ldai.client import (LDAIAgentConfig, LDAIAgentDefaults, LDAIClient,
-                         ModelConfig, ProviderConfig)
+from ldai import (LDAIAgentConfig, LDAIAgentDefaults, LDAIClient, ModelConfig,
+                  ProviderConfig)
 
 
 @pytest.fixture
diff --git a/ldai/testing/test_model_config.py b/ldai/testing/test_model_config.py
index 1ffc033..b35389d 100644
--- a/ldai/testing/test_model_config.py
+++ b/ldai/testing/test_model_config.py
@@ -2,7 +2,7 @@
 from ldclient import Config, Context, LDClient
 from ldclient.integrations.test_data import TestData
 
-from ldai.client import AIConfig, LDAIClient, LDMessage, ModelConfig
+from ldai import AIConfig, LDAIClient, LDMessage, ModelConfig
 
 
 @pytest.fixture

From 951eda13bc01b0515ef6e1ce042f5647d5fbd43e Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Fri, 7 Nov 2025 21:22:22 +0000
Subject: [PATCH 02/37] create new config types completion, agent, and judges

---
 ldai/__init__.py |  28 +++++--
 ldai/client.py   | 200 +++++++++++++++++++++++++++++++++++++----------
 ldai/models.py   | 189 +++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 348 insertions(+), 69 deletions(-)

diff --git a/ldai/__init__.py b/ldai/__init__.py
index 91b3a2d..11369c9 100644
--- a/ldai/__init__.py
+++ b/ldai/__init__.py
@@ -5,10 +5,16 @@
 
 # Export models for convenience
 from ldai.models import (
-    AIConfig,
-    LDAIAgent,
-    LDAIAgentConfig,
-    LDAIAgentDefaults,
+    AIAgentConfig,
+    AIAgentConfigDefault,
+    AIAgentConfigRequest,
+    AIAgents,
+    AICompletionConfig,
+    AICompletionConfigDefault,
+    AIJudgeConfig,
+    AIJudgeConfigDefault,
+    Judge,
+    JudgeConfiguration,
     LDMessage,
     ModelConfig,
     ProviderConfig,
@@ -16,10 +22,16 @@
 
 __all__ = [
     'LDAIClient',
-    'AIConfig',
-    'LDAIAgent',
-    'LDAIAgentConfig',
-    'LDAIAgentDefaults',
+    'AIAgentConfig',
+    'AIAgentConfigDefault',
+    'AIAgentConfigRequest',
+    'AIAgents',
+    'AICompletionConfig',
+    'AICompletionConfigDefault',
+    'AIJudgeConfig',
+    'AIJudgeConfigDefault',
+    'Judge',
+    'JudgeConfiguration',
     'LDMessage',
     'ModelConfig',
     'ProviderConfig',
diff --git a/ldai/client.py b/ldai/client.py
index db2a6ad..4f3cc9e 100644
--- a/ldai/client.py
+++ b/ldai/client.py
@@ -5,11 +5,16 @@
 from ldclient.client import LDClient
 
 from ldai.models import (
-    AIConfig,
-    LDAIAgent,
-    LDAIAgentConfig,
-    LDAIAgentDefaults,
-    LDAIAgents,
+    AIAgentConfig,
+    AIAgentConfigDefault,
+    AIAgentConfigRequest,
+    AIAgents,
+    AICompletionConfig,
+    AICompletionConfigDefault,
+    AIJudgeConfig,
+    AIJudgeConfigDefault,
+    Judge,
+    JudgeConfiguration,
     LDMessage,
     ModelConfig,
     ProviderConfig,
@@ -23,40 +28,103 @@ class LDAIClient:
     def __init__(self, client: LDClient):
         self._client = client
 
+    def completion_config(
+        self,
+        key: str,
+        context: Context,
+        default_value: AICompletionConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+    ) -> AICompletionConfig:
+        """
+        Get the value of a completion configuration.
+
+        :param key: The key of the completion configuration.
+        :param context: The context to evaluate the completion configuration in.
+        :param default_value: The default value of the completion configuration.
+        :param variables: Additional variables for the completion configuration.
+        :return: The completion configuration with a tracker used for gathering metrics.
+        """
+        self._client.track('$ld:ai:config:function:single', context, key, 1)
+
+        model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate(
+            key, context, default_value.to_dict(), variables
+        )
+
+        config = AICompletionConfig(
+            enabled=bool(enabled),
+            model=model,
+            messages=messages,
+            provider=provider,
+            tracker=tracker,
+            judge_configuration=judge_configuration,
+        )
+
+        return config
+
     def config(
         self,
         key: str,
         context: Context,
-        default_value: AIConfig,
+        default_value: AICompletionConfigDefault,
         variables: Optional[Dict[str, Any]] = None,
-    ) -> Tuple[AIConfig, LDAIConfigTracker]:
+    ) -> AICompletionConfig:
         """
         Get the value of a model configuration.
 
+        .. deprecated:: Use :meth:`completion_config` instead. This method will be removed in a future version.
+
         :param key: The key of the model configuration.
         :param context: The context to evaluate the model configuration in.
         :param default_value: The default value of the model configuration.
         :param variables: Additional variables for the model configuration.
         :return: The value of the model configuration along with a tracker used for gathering metrics.
         """
-        self._client.track('$ld:ai:config:function:single', context, key, 1)
+        return self.completion_config(key, context, default_value, variables)
 
-        model, provider, messages, instructions, tracker, enabled = self.__evaluate(key, context, default_value.to_dict(), variables)
+    def judge_config(
+        self,
+        key: str,
+        context: Context,
+        default_value: AIJudgeConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+    ) -> AIJudgeConfig:
+        """
+        Get the value of a judge configuration.
+
+        :param key: The key of the judge configuration.
+        :param context: The context to evaluate the judge configuration in.
+        :param default_value: The default value of the judge configuration.
+        :param variables: Additional variables for the judge configuration.
+        :return: The judge configuration with a tracker used for gathering metrics.
+        """
+        self._client.track('$ld:ai:judge:function:single', context, key, 1)
+
+        model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate(
+            key, context, default_value.to_dict(), variables
+        )
 
-        config = AIConfig(
+        # Extract evaluation_metric_keys from the variation
+        variation = self._client.variation(key, context, default_value.to_dict())
+        evaluation_metric_keys = variation.get('evaluationMetricKeys', default_value.evaluation_metric_keys or [])
+
+        config = AIJudgeConfig(
             enabled=bool(enabled),
+            evaluation_metric_keys=evaluation_metric_keys,
             model=model,
             messages=messages,
             provider=provider,
+            tracker=tracker,
         )
 
-        return config, tracker
+        return config
 
-    def agent(
+    def agent_config(
         self,
-        config: LDAIAgentConfig,
+        key: str,
         context: Context,
-    ) -> LDAIAgent:
+        default_value: AIAgentConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+    ) -> AIAgentConfig:
         """
         Retrieve a single AI Config agent.
 
@@ -65,39 +133,58 @@ def agent(
 
         Example::
 
-            agent = client.agent(LDAIAgentConfig(
-                key='research_agent',
-                default_value=LDAIAgentDefaults(
+            agent = client.agent_config(
+                'research_agent',
+                context,
+                AIAgentConfigDefault(
                     enabled=True,
                     model=ModelConfig('gpt-4'),
                     instructions="You are a research assistant specializing in {{topic}}."
                 ),
                 variables={'topic': 'climate change'}
-            ), context)
+            )
 
             if agent.enabled:
                 research_result = agent.instructions  # Interpolated instructions
                 agent.tracker.track_success()
 
-        :param config: The agent configuration to use.
+        :param key: The agent configuration key.
         :param context: The context to evaluate the agent configuration in.
-        :return: Configured LDAIAgent instance.
+        :param default_value: Default agent values.
+        :param variables: Variables for interpolation.
+        :return: Configured AIAgentConfig instance.
         """
         # Track single agent usage
         self._client.track(
             "$ld:ai:agent:function:single",
             context,
-            config.key,
+            key,
             1
         )
 
-        return self.__evaluate_agent(config.key, context, config.default_value, config.variables)
+        return self.__evaluate_agent(key, context, default_value, variables)
 
-    def agents(
+    def agent(
         self,
-        agent_configs: List[LDAIAgentConfig],
+        config: AIAgentConfigRequest,
         context: Context,
-    ) -> LDAIAgents:
+    ) -> AIAgentConfig:
+        """
+        Retrieve a single AI Config agent.
+
+        .. deprecated:: Use :meth:`agent_config` instead. This method will be removed in a future version.
+
+        :param config: The agent configuration to use.
+        :param context: The context to evaluate the agent configuration in.
+        :return: Configured AIAgentConfig instance.
+        """
+        return self.agent_config(config.key, context, config.default_value, config.variables)
+
+    def agent_configs(
+        self,
+        agent_configs: List[AIAgentConfigRequest],
+        context: Context,
+    ) -> AIAgents:
         """
         Retrieve multiple AI agent configurations.
 
@@ -107,18 +194,18 @@ def agents(
 
         Example::
 
-            agents = client.agents([
-                LDAIAgentConfig(
+            agents = client.agent_configs([
+                AIAgentConfigRequest(
                     key='research_agent',
-                    default_value=LDAIAgentDefaults(
+                    default_value=AIAgentConfigDefault(
                         enabled=True,
                         instructions='You are a research assistant.'
                     ),
                     variables={'topic': 'climate change'}
                 ),
-                LDAIAgentConfig(
+                AIAgentConfigRequest(
                     key='writing_agent',
-                    default_value=LDAIAgentDefaults(
+                    default_value=AIAgentConfigDefault(
                         enabled=True,
                         instructions='You are a writing assistant.'
                     ),
@@ -131,7 +218,7 @@ def agents(
 
         :param agent_configs: List of agent configurations to retrieve.
         :param context: The context to evaluate the agent configurations in.
-        :return: Dictionary mapping agent keys to their LDAIAgent configurations.
+        :return: Dictionary mapping agent keys to their AIAgentConfig configurations.
         """
         # Track multiple agents usage
         agent_count = len(agent_configs)
@@ -142,7 +229,7 @@ def agents(
             agent_count
         )
 
-        result: LDAIAgents = {}
+        result: AIAgents = {}
 
         for config in agent_configs:
             agent = self.__evaluate_agent(
@@ -155,13 +242,29 @@ def agents(
 
         return result
 
+    def agents(
+        self,
+        agent_configs: List[AIAgentConfigRequest],
+        context: Context,
+    ) -> AIAgents:
+        """
+        Retrieve multiple AI agent configurations.
+
+        .. deprecated:: Use :meth:`agent_configs` instead. This method will be removed in a future version.
+
+        :param agent_configs: List of agent configurations to retrieve.
+        :param context: The context to evaluate the agent configurations in.
+        :return: Dictionary mapping agent keys to their AIAgentConfig configurations.
+        """
+        return self.agent_configs(agent_configs, context)
+
     def __evaluate(
         self,
         key: str,
         context: Context,
         default_dict: Dict[str, Any],
         variables: Optional[Dict[str, Any]] = None,
-    ) -> Tuple[Optional[ModelConfig], Optional[ProviderConfig], Optional[List[LDMessage]], Optional[str], LDAIConfigTracker, bool]:
+    ) -> Tuple[Optional[ModelConfig], Optional[ProviderConfig], Optional[List[LDMessage]], Optional[str], LDAIConfigTracker, bool, Optional[Any]]:
         """
         Internal method to evaluate a configuration and extract components.
 
@@ -228,15 +331,31 @@ def __evaluate(
 
         enabled = variation.get('_ldMeta', {}).get('enabled', False)
 
-        return model, provider_config, messages, instructions, tracker, enabled
+        # Extract judge configuration
+        judge_configuration = None
+        if 'judgeConfiguration' in variation and isinstance(variation['judgeConfiguration'], dict):
+            judge_config = variation['judgeConfiguration']
+            if 'judges' in judge_config and isinstance(judge_config['judges'], list):
+                judges = [
+                    Judge(
+                        key=judge['key'],
+                        sampling_rate=judge['samplingRate']
+                    )
+                    for judge in judge_config['judges']
+                    if isinstance(judge, dict) and 'key' in judge and 'samplingRate' in judge
+                ]
+                if judges:
+                    judge_configuration = JudgeConfiguration(judges=judges)
+
+        return model, provider_config, messages, instructions, tracker, enabled, judge_configuration
 
     def __evaluate_agent(
         self,
         key: str,
         context: Context,
-        default_value: LDAIAgentDefaults,
+        default_value: AIAgentConfigDefault,
         variables: Optional[Dict[str, Any]] = None,
-    ) -> LDAIAgent:
+    ) -> AIAgentConfig:
         """
         Internal method to evaluate an agent configuration.
 
@@ -244,21 +363,22 @@ def __evaluate_agent(
         :param context: The evaluation context.
         :param default_value: Default agent values.
         :param variables: Variables for interpolation.
-        :return: Configured LDAIAgent instance.
+        :return: Configured AIAgentConfig instance.
         """
-        model, provider, messages, instructions, tracker, enabled = self.__evaluate(
+        model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate(
             key, context, default_value.to_dict(), variables
         )
 
         # For agents, prioritize instructions over messages
         final_instructions = instructions if instructions is not None else default_value.instructions
 
-        return LDAIAgent(
-            enabled=bool(enabled) if enabled is not None else default_value.enabled,
+        return AIAgentConfig(
+            enabled=bool(enabled) if enabled is not None else (default_value.enabled or False),
             model=model or default_value.model,
             provider=provider or default_value.provider,
             instructions=final_instructions,
             tracker=tracker,
+            judge_configuration=judge_configuration or default_value.judge_configuration,
         )
 
     def __interpolate_template(self, template: str, variables: Dict[str, Any]) -> str:
diff --git a/ldai/models.py b/ldai/models.py
index 4eef5a2..83b5326 100644
--- a/ldai/models.py
+++ b/ldai/models.py
@@ -100,18 +100,64 @@ def to_dict(self) -> dict:
         }
 
 
+# ============================================================================
+# Judge Types
+# ============================================================================
+
+@dataclass(frozen=True)
+class Judge:
+    """
+    Configuration for a single judge attachment.
+    """
+    key: str
+    sampling_rate: float
+
+    def to_dict(self) -> dict:
+        """
+        Render the judge as a dictionary object.
+        """
+        return {
+            'key': self.key,
+            'samplingRate': self.sampling_rate,
+        }
+
+
+@dataclass(frozen=True)
+class JudgeConfiguration:
+    """
+    Configuration for judge attachment to AI Configs.
+    """
+    judges: List[Judge]
+
+    def to_dict(self) -> dict:
+        """
+        Render the judge configuration as a dictionary object.
+        """
+        return {
+            'judges': [judge.to_dict() for judge in self.judges],
+        }
+
+
+# ============================================================================
+# Completion Config Types
+# ============================================================================
+
 @dataclass(frozen=True)
-class AIConfig:
+class AICompletionConfigDefault:
+    """
+    Default Completion AI Config (default mode).
+    """
     enabled: Optional[bool] = None
     model: Optional[ModelConfig] = None
     messages: Optional[List[LDMessage]] = None
     provider: Optional[ProviderConfig] = None
+    judge_configuration: Optional[JudgeConfiguration] = None
 
     def to_dict(self) -> dict:
         """
-        Render the given default values as an AIConfig-compatible dictionary object.
+        Render the given default values as an AICompletionConfigDefault-compatible dictionary object.
         """
-        return {
+        result = {
             '_ldMeta': {
                 'enabled': self.enabled or False,
             },
@@ -119,25 +165,59 @@ def to_dict(self) -> dict:
             'messages': [message.to_dict() for message in self.messages] if self.messages else None,
             'provider': self.provider.to_dict() if self.provider else None,
         }
+        if self.judge_configuration is not None:
+            result['judgeConfiguration'] = self.judge_configuration.to_dict()
+        return result
 
 
 @dataclass(frozen=True)
-class LDAIAgent:
+class AICompletionConfig:
+    """
+    Completion AI Config (default mode).
     """
-    Represents an AI agent configuration with instructions and model settings.
+    enabled: bool
+    model: Optional[ModelConfig] = None
+    messages: Optional[List[LDMessage]] = None
+    provider: Optional[ProviderConfig] = None
+    tracker: Optional[LDAIConfigTracker] = None
+    judge_configuration: Optional[JudgeConfiguration] = None
+
+    def to_dict(self) -> dict:
+        """
+        Render the given completion config as a dictionary object.
+        """
+        result = {
+            '_ldMeta': {
+                'enabled': self.enabled,
+            },
+            'model': self.model.to_dict() if self.model else None,
+            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
+            'provider': self.provider.to_dict() if self.provider else None,
+        }
+        if self.judge_configuration is not None:
+            result['judgeConfiguration'] = self.judge_configuration.to_dict()
+        return result
+
 
-    An agent is similar to an AIConfig but focuses on instructions rather than messages,
-    making it suitable for AI assistant/agent use cases.
+# ============================================================================
+# Agent Config Types
+# ============================================================================
+
+
+@dataclass(frozen=True)
+class AIAgentConfigDefault:
+    """
+    Default Agent-specific AI Config with instructions.
     """
     enabled: Optional[bool] = None
     model: Optional[ModelConfig] = None
     provider: Optional[ProviderConfig] = None
     instructions: Optional[str] = None
-    tracker: Optional[LDAIConfigTracker] = None
+    judge_configuration: Optional[JudgeConfiguration] = None
 
     def to_dict(self) -> Dict[str, Any]:
         """
-        Render the given agent as a dictionary object.
+        Render the given agent config default as a dictionary object.
         """
         result: Dict[str, Any] = {
             '_ldMeta': {
@@ -148,50 +228,117 @@ def to_dict(self) -> Dict[str, Any]:
         }
         if self.instructions is not None:
             result['instructions'] = self.instructions
+        if self.judge_configuration is not None:
+            result['judgeConfiguration'] = self.judge_configuration.to_dict()
         return result
 
 
 @dataclass(frozen=True)
-class LDAIAgentDefaults:
+class AIAgentConfig:
     """
-    Default values for AI agent configurations.
-
-    Similar to LDAIAgent but without tracker and with optional enabled field,
-    used as fallback values when agent configurations are not available.
+    Agent-specific AI Config with instructions.
     """
-    enabled: Optional[bool] = None
+    enabled: bool
     model: Optional[ModelConfig] = None
     provider: Optional[ProviderConfig] = None
     instructions: Optional[str] = None
+    tracker: Optional[LDAIConfigTracker] = None
+    judge_configuration: Optional[JudgeConfiguration] = None
 
     def to_dict(self) -> Dict[str, Any]:
         """
-        Render the given agent defaults as a dictionary object.
+        Render the given agent config as a dictionary object.
         """
         result: Dict[str, Any] = {
             '_ldMeta': {
-                'enabled': self.enabled or False,
+                'enabled': self.enabled,
             },
             'model': self.model.to_dict() if self.model else None,
             'provider': self.provider.to_dict() if self.provider else None,
         }
         if self.instructions is not None:
             result['instructions'] = self.instructions
+        if self.judge_configuration is not None:
+            result['judgeConfiguration'] = self.judge_configuration.to_dict()
         return result
 
 
+# ============================================================================
+# Judge Config Types
+# ============================================================================
+
+@dataclass(frozen=True)
+class AIJudgeConfigDefault:
+    """
+    Default Judge-specific AI Config with required evaluation metric key.
+    """
+    enabled: Optional[bool] = None
+    model: Optional[ModelConfig] = None
+    messages: Optional[List[LDMessage]] = None
+    provider: Optional[ProviderConfig] = None
+    evaluation_metric_keys: Optional[List[str]] = None
+
+    def to_dict(self) -> dict:
+        """
+        Render the given judge config default as a dictionary object.
+        """
+        result = {
+            '_ldMeta': {
+                'enabled': self.enabled or False,
+            },
+            'model': self.model.to_dict() if self.model else None,
+            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
+            'provider': self.provider.to_dict() if self.provider else None,
+        }
+        if self.evaluation_metric_keys is not None:
+            result['evaluationMetricKeys'] = self.evaluation_metric_keys
+        return result
+
+
+@dataclass(frozen=True)
+class AIJudgeConfig:
+    """
+    Judge-specific AI Config with required evaluation metric key.
+    """
+    enabled: bool
+    evaluation_metric_keys: List[str]
+    model: Optional[ModelConfig] = None
+    messages: Optional[List[LDMessage]] = None
+    provider: Optional[ProviderConfig] = None
+    tracker: Optional[LDAIConfigTracker] = None
+
+    def to_dict(self) -> dict:
+        """
+        Render the given judge config as a dictionary object.
+        """
+        result = {
+            '_ldMeta': {
+                'enabled': self.enabled,
+            },
+            'evaluationMetricKeys': self.evaluation_metric_keys,
+            'model': self.model.to_dict() if self.model else None,
+            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
+            'provider': self.provider.to_dict() if self.provider else None,
+        }
+        return result
+
+
+# ============================================================================
+# Agent Request Config
+# ============================================================================
+
 @dataclass
-class LDAIAgentConfig:
+class AIAgentConfigRequest:
     """
-    Configuration for individual agent in batch requests.
+    Configuration for a single agent request.
 
     Combines agent key with its specific default configuration and variables.
     """
     key: str
-    default_value: LDAIAgentDefaults
+    default_value: AIAgentConfigDefault
     variables: Optional[Dict[str, Any]] = None
 
 
 # Type alias for multiple agents
-LDAIAgents = Dict[str, LDAIAgent]
+AIAgents = Dict[str, AIAgentConfig]
 

From ae7516be21a304942ddf0a65493eb54e8f8bc984 Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Fri, 7 Nov 2025 21:26:32 +0000
Subject: [PATCH 03/37] use inheritance for configs for consistency

---
 ldai/models.py | 131 +++++++++++++++++++++++--------------------------
 1 file changed, 61 insertions(+), 70 deletions(-)

diff --git a/ldai/models.py b/ldai/models.py
index 83b5326..e8ddf21 100644
--- a/ldai/models.py
+++ b/ldai/models.py
@@ -139,61 +139,91 @@ def to_dict(self) -> dict:
 
 
 # ============================================================================
-# Completion Config Types
+# Base AI Config Types
 # ============================================================================
 
 @dataclass(frozen=True)
-class AICompletionConfigDefault:
+class AIConfigDefault:
     """
-    Default Completion AI Config (default mode).
+    Base AI Config interface for default implementations with optional enabled property.
     """
     enabled: Optional[bool] = None
     model: Optional[ModelConfig] = None
-    messages: Optional[List[LDMessage]] = None
     provider: Optional[ProviderConfig] = None
-    judge_configuration: Optional[JudgeConfiguration] = None
 
-    def to_dict(self) -> dict:
+    def _base_to_dict(self) -> Dict[str, Any]:
         """
-        Render the given default values as an AICompletionConfigDefault-compatible dictionary object.
+        Render the base config fields as a dictionary object.
         """
-        result = {
+        return {
             '_ldMeta': {
                 'enabled': self.enabled or False,
             },
             'model': self.model.to_dict() if self.model else None,
-            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
             'provider': self.provider.to_dict() if self.provider else None,
         }
-        if self.judge_configuration is not None:
-            result['judgeConfiguration'] = self.judge_configuration.to_dict()
-        return result
 
 
 @dataclass(frozen=True)
-class AICompletionConfig:
+class AIConfig:
     """
-    Completion AI Config (default mode).
+    Base AI Config interface without mode-specific fields.
     """
     enabled: bool
     model: Optional[ModelConfig] = None
-    messages: Optional[List[LDMessage]] = None
     provider: Optional[ProviderConfig] = None
     tracker: Optional[LDAIConfigTracker] = None
-    judge_configuration: Optional[JudgeConfiguration] = None
 
-    def to_dict(self) -> dict:
+    def _base_to_dict(self) -> Dict[str, Any]:
         """
-        Render the given completion config as a dictionary object.
+        Render the base config fields as a dictionary object.
         """
-        result = {
+        return {
             '_ldMeta': {
                 'enabled': self.enabled,
             },
             'model': self.model.to_dict() if self.model else None,
-            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
             'provider': self.provider.to_dict() if self.provider else None,
         }
+
+
+# ============================================================================
+# Completion Config Types
+# ============================================================================
+
+@dataclass(frozen=True)
+class AICompletionConfigDefault(AIConfigDefault):
+    """
+    Default Completion AI Config (default mode).
+    """
+    messages: Optional[List[LDMessage]] = None
+    judge_configuration: Optional[JudgeConfiguration] = None
+
+    def to_dict(self) -> dict:
+        """
+        Render the given default values as an AICompletionConfigDefault-compatible dictionary object.
+        """
+        result = self._base_to_dict()
+        result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
+        if self.judge_configuration is not None:
+            result['judgeConfiguration'] = self.judge_configuration.to_dict()
+        return result
+
+
+@dataclass(frozen=True)
+class AICompletionConfig(AIConfig):
+    """
+    Completion AI Config (default mode).
+    """
+    messages: Optional[List[LDMessage]] = None
+    judge_configuration: Optional[JudgeConfiguration] = None
+
+    def to_dict(self) -> dict:
+        """
+        Render the given completion config as a dictionary object.
+        """
+        result = self._base_to_dict()
+        result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
         if self.judge_configuration is not None:
             result['judgeConfiguration'] = self.judge_configuration.to_dict()
         return result
@@ -203,15 +233,11 @@ def to_dict(self) -> dict:
 # Agent Config Types
 # ============================================================================
 
-
 @dataclass(frozen=True)
-class AIAgentConfigDefault:
+class AIAgentConfigDefault(AIConfigDefault):
     """
     Default Agent-specific AI Config with instructions.
     """
-    enabled: Optional[bool] = None
-    model: Optional[ModelConfig] = None
-    provider: Optional[ProviderConfig] = None
     instructions: Optional[str] = None
     judge_configuration: Optional[JudgeConfiguration] = None
 
@@ -219,13 +245,7 @@ def to_dict(self) -> Dict[str, Any]:
         """
         Render the given agent config default as a dictionary object.
         """
-        result: Dict[str, Any] = {
-            '_ldMeta': {
-                'enabled': self.enabled or False,
-            },
-            'model': self.model.to_dict() if self.model else None,
-            'provider': self.provider.to_dict() if self.provider else None,
-        }
+        result = self._base_to_dict()
         if self.instructions is not None:
             result['instructions'] = self.instructions
         if self.judge_configuration is not None:
@@ -234,28 +254,18 @@ def to_dict(self) -> Dict[str, Any]:
 
 
 @dataclass(frozen=True)
-class AIAgentConfig:
+class AIAgentConfig(AIConfig):
     """
     Agent-specific AI Config with instructions.
     """
-    enabled: bool
-    model: Optional[ModelConfig] = None
-    provider: Optional[ProviderConfig] = None
     instructions: Optional[str] = None
-    tracker: Optional[LDAIConfigTracker] = None
     judge_configuration: Optional[JudgeConfiguration] = None
 
     def to_dict(self) -> Dict[str, Any]:
         """
         Render the given agent config as a dictionary object.
         """
-        result: Dict[str, Any] = {
-            '_ldMeta': {
-                'enabled': self.enabled,
-            },
-            'model': self.model.to_dict() if self.model else None,
-            'provider': self.provider.to_dict() if self.provider else None,
-        }
+        result = self._base_to_dict()
         if self.instructions is not None:
             result['instructions'] = self.instructions
         if self.judge_configuration is not None:
@@ -268,58 +278,39 @@ def to_dict(self) -> Dict[str, Any]:
 # ============================================================================
 
 @dataclass(frozen=True)
-class AIJudgeConfigDefault:
+class AIJudgeConfigDefault(AIConfigDefault):
     """
     Default Judge-specific AI Config with required evaluation metric key.
     """
-    enabled: Optional[bool] = None
-    model: Optional[ModelConfig] = None
     messages: Optional[List[LDMessage]] = None
-    provider: Optional[ProviderConfig] = None
     evaluation_metric_keys: Optional[List[str]] = None
 
     def to_dict(self) -> dict:
         """
         Render the given judge config default as a dictionary object.
         """
-        result = {
-            '_ldMeta': {
-                'enabled': self.enabled or False,
-            },
-            'model': self.model.to_dict() if self.model else None,
-            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
-            'provider': self.provider.to_dict() if self.provider else None,
-        }
+        result = self._base_to_dict()
+        result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
         if self.evaluation_metric_keys is not None:
             result['evaluationMetricKeys'] = self.evaluation_metric_keys
         return result
 
 
 @dataclass(frozen=True)
-class AIJudgeConfig:
+class AIJudgeConfig(AIConfig):
     """
     Judge-specific AI Config with required evaluation metric key.
     """
-    enabled: bool
     evaluation_metric_keys: List[str]
-    model: Optional[ModelConfig] = None
     messages: Optional[List[LDMessage]] = None
-    provider: Optional[ProviderConfig] = None
-    tracker: Optional[LDAIConfigTracker] = None
 
     def to_dict(self) -> dict:
         """
         Render the given judge config as a dictionary object.
         """
-        result = {
-            '_ldMeta': {
-                'enabled': self.enabled,
-            },
-            'evaluationMetricKeys': self.evaluation_metric_keys,
-            'model': self.model.to_dict() if self.model else None,
-            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
-            'provider': self.provider.to_dict() if self.provider else None,
-        }
+        result = self._base_to_dict()
+        result['evaluationMetricKeys'] = self.evaluation_metric_keys
+        result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
         return result
 
 

From 0d933d2d9b0721339a77f2d656aaa50a74fb7d2a Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Fri, 7 Nov 2025 21:26:51 +0000
Subject: [PATCH 04/37] added deprecations for old types

---
 ldai/__init__.py | 10 ++++++++++
 ldai/models.py   | 25 +++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/ldai/__init__.py b/ldai/__init__.py
index 11369c9..222c007 100644
--- a/ldai/__init__.py
+++ b/ldai/__init__.py
@@ -18,6 +18,11 @@
     LDMessage,
     ModelConfig,
     ProviderConfig,
+    # Deprecated aliases for backward compatibility
+    AIConfig,
+    LDAIAgent,
+    LDAIAgentConfig,
+    LDAIAgentDefaults,
 )
 
 __all__ = [
@@ -35,4 +40,9 @@
     'LDMessage',
     'ModelConfig',
     'ProviderConfig',
+    # Deprecated exports
+    'AIConfig',
+    'LDAIAgent',
+    'LDAIAgentConfig',
+    'LDAIAgentDefaults',
 ]
diff --git a/ldai/models.py b/ldai/models.py
index e8ddf21..f83964a 100644
--- a/ldai/models.py
+++ b/ldai/models.py
@@ -1,3 +1,4 @@
+import warnings
 from dataclasses import dataclass
 from typing import Any, Dict, List, Literal, Optional
 
@@ -333,3 +334,27 @@ class AIAgentConfigRequest:
 # Type alias for multiple agents
 AIAgents = Dict[str, AIAgentConfig]
 
+
+# ============================================================================
+# Deprecated Type Aliases for Backward Compatibility
+# ============================================================================
+
+# Note: These are type aliases that point to the new types.
+# Since Python uses duck typing, these will work at runtime even if type checkers complain.
+# The old AIConfig had optional enabled, so it maps to AICompletionConfigDefault
+# The old AIConfig return type had required enabled, so it maps to AICompletionConfig
+
+# Deprecated: Use AICompletionConfigDefault instead
+# This was the old AIConfig with optional enabled (used as input/default)
+# Note: We map to AICompletionConfigDefault since the old AIConfig had optional enabled
+AIConfig = AICompletionConfigDefault
+
+# Deprecated: Use AIAgentConfigDefault instead
+LDAIAgentDefaults = AIAgentConfigDefault
+
+# Deprecated: Use AIAgentConfigRequest instead
+LDAIAgentConfig = AIAgentConfigRequest
+
+# Deprecated: Use AIAgentConfig instead (note: this was the old return type)
+LDAIAgent = AIAgentConfig
+

From 82718075cdbab1410729a52ba5f8950a866e5e76 Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Fri, 7 Nov 2025 21:35:34 +0000
Subject: [PATCH 05/37] create the ai provider interface and factory

---
 ldai/models.py                        |   5 +-
 ldai/providers/__init__.py            |  11 ++
 ldai/providers/ai_provider.py         |  96 ++++++++++++++++
 ldai/providers/ai_provider_factory.py | 154 ++++++++++++++++++++++++++
 ldai/providers/types.py               |  37 +++++++
 5 files changed, 302 insertions(+), 1 deletion(-)
 create mode 100644 ldai/providers/__init__.py
 create mode 100644 ldai/providers/ai_provider.py
 create mode 100644 ldai/providers/ai_provider_factory.py
 create mode 100644 ldai/providers/types.py

diff --git a/ldai/models.py b/ldai/models.py
index f83964a..0b961f7 100644
--- a/ldai/models.py
+++ b/ldai/models.py
@@ -1,6 +1,6 @@
 import warnings
 from dataclasses import dataclass
-from typing import Any, Dict, List, Literal, Optional
+from typing import Any, Dict, List, Literal, Optional, Union
 
 from ldai.tracker import LDAIConfigTracker
 
@@ -334,6 +334,9 @@ class AIAgentConfigRequest:
 # Type alias for multiple agents
 AIAgents = Dict[str, AIAgentConfig]
 
+# Type alias for all AI Config variants
+AIConfigKind = Union[AIAgentConfig, AICompletionConfig, AIJudgeConfig]
+
 
 # ============================================================================
 # Deprecated Type Aliases for Backward Compatibility
diff --git a/ldai/providers/__init__.py b/ldai/providers/__init__.py
new file mode 100644
index 0000000..8cac547
--- /dev/null
+++ b/ldai/providers/__init__.py
@@ -0,0 +1,11 @@
+"""AI Provider interfaces and factory for LaunchDarkly AI SDK."""
+
+from ldai.providers.ai_provider import AIProvider
+from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider
+
+__all__ = [
+    'AIProvider',
+    'AIProviderFactory',
+    'SupportedAIProvider',
+]
+
diff --git a/ldai/providers/ai_provider.py b/ldai/providers/ai_provider.py
new file mode 100644
index 0000000..5863a74
--- /dev/null
+++ b/ldai/providers/ai_provider.py
@@ -0,0 +1,96 @@
+"""Abstract base class for AI providers."""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional, Union
+
+from ldai.models import AIConfigKind, LDMessage
+from ldai.providers.types import ChatResponse, StructuredResponse
+
+
+class AIProvider(ABC):
+    """
+    Abstract base class for AI providers that implement chat model functionality.
+    
+    This class provides the contract that all provider implementations must follow
+    to integrate with LaunchDarkly's tracking and configuration capabilities.
+    
+    Following the AICHAT spec recommendation to use base classes with non-abstract methods
+    for better extensibility and backwards compatibility.
+    """
+
+    def __init__(self, logger: Optional[Any] = None):
+        """
+        Initialize the AI provider.
+        
+        :param logger: Optional logger for logging provider operations.
+        """
+        self.logger = logger
+
+    async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
+        """
+        Invoke the chat model with an array of messages.
+        
+        This method should convert messages to provider format, invoke the model,
+        and return a ChatResponse with the result and metrics.
+        
+        Default implementation takes no action and returns a placeholder response.
+        Provider implementations should override this method.
+        
+        :param messages: Array of LDMessage objects representing the conversation
+        :return: ChatResponse containing the model's response
+        """
+        if self.logger:
+            self.logger.warn('invokeModel not implemented by this provider')
+        
+        from ldai.models import LDMessage
+        from ldai.providers.types import LDAIMetrics
+        
+        return ChatResponse(
+            message=LDMessage(role='assistant', content=''),
+            metrics=LDAIMetrics(success=False, usage=None),
+        )
+
+    async def invoke_structured_model(
+        self,
+        messages: List[LDMessage],
+        response_structure: Dict[str, Any],
+    ) -> StructuredResponse:
+        """
+        Invoke the chat model with structured output support.
+        
+        This method should convert messages to provider format, invoke the model with
+        structured output configuration, and return a structured response.
+        
+        Default implementation takes no action and returns a placeholder response.
+        Provider implementations should override this method.
+        
+        :param messages: Array of LDMessage objects representing the conversation
+        :param response_structure: Dictionary of output configurations keyed by output name
+        :return: StructuredResponse containing the structured data
+        """
+        if self.logger:
+            self.logger.warn('invokeStructuredModel not implemented by this provider')
+        
+        from ldai.providers.types import LDAIMetrics
+        
+        return StructuredResponse(
+            data={},
+            raw_response='',
+            metrics=LDAIMetrics(success=False, usage=None),
+        )
+
+    @staticmethod
+    @abstractmethod
+    async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'AIProvider':
+        """
+        Static method that constructs an instance of the provider.
+        
+        Each provider implementation must provide their own static create method
+        that accepts an AIConfigKind and returns a configured instance.
+        
+        :param ai_config: The LaunchDarkly AI configuration
+        :param logger: Optional logger for the provider
+        :return: Configured provider instance
+        """
+        raise NotImplementedError('Provider implementations must override the static create method')
+
diff --git a/ldai/providers/ai_provider_factory.py b/ldai/providers/ai_provider_factory.py
new file mode 100644
index 0000000..dab3796
--- /dev/null
+++ b/ldai/providers/ai_provider_factory.py
@@ -0,0 +1,154 @@
+"""Factory for creating AIProvider instances based on the provider configuration."""
+
+import importlib
+from typing import Any, List, Literal, Optional, Type
+
+from ldai.models import AIConfigKind
+from ldai.providers.ai_provider import AIProvider
+
+
+# List of supported AI providers
+SUPPORTED_AI_PROVIDERS = [
+    # Multi-provider packages should be last in the list
+    'langchain',
+]
+
+# Type representing the supported AI providers
+SupportedAIProvider = Literal['langchain']
+
+
+class AIProviderFactory:
+    """
+    Factory for creating AIProvider instances based on the provider configuration.
+    """
+
+    @staticmethod
+    async def create(
+        ai_config: AIConfigKind,
+        logger: Optional[Any] = None,
+        default_ai_provider: Optional[SupportedAIProvider] = None,
+    ) -> Optional[AIProvider]:
+        """
+        Create an AIProvider instance based on the AI configuration.
+        
+        This method attempts to load provider-specific implementations dynamically.
+        Returns None if the provider is not supported.
+        
+        :param ai_config: The AI configuration
+        :param logger: Optional logger for logging provider initialization
+        :param default_ai_provider: Optional default AI provider to use
+        :return: AIProvider instance or None if not supported
+        """
+        provider_name = ai_config.provider.name.lower() if ai_config.provider else None
+        # Determine which providers to try based on default_ai_provider
+        providers_to_try = AIProviderFactory._get_providers_to_try(default_ai_provider, provider_name)
+
+        # Try each provider in order
+        for provider_type in providers_to_try:
+            provider = await AIProviderFactory._try_create_provider(provider_type, ai_config, logger)
+            if provider:
+                return provider
+
+        # If no provider was successfully created, log a warning
+        if logger:
+            logger.warn(
+                f"Provider is not supported or failed to initialize: {provider_name or 'unknown'}"
+            )
+        return None
+
+    @staticmethod
+    def _get_providers_to_try(
+        default_ai_provider: Optional[SupportedAIProvider],
+        provider_name: Optional[str],
+    ) -> List[SupportedAIProvider]:
+        """
+        Determine which providers to try based on default_ai_provider and provider_name.
+        
+        :param default_ai_provider: Optional default provider to use
+        :param provider_name: Optional provider name from config
+        :return: List of providers to try in order
+        """
+        # If default_ai_provider is set, only try that specific provider
+        if default_ai_provider:
+            return [default_ai_provider]
+
+        # If no default_ai_provider is set, try all providers in order
+        provider_set = set()
+
+        # First try the specific provider if it's supported
+        if provider_name and provider_name in SUPPORTED_AI_PROVIDERS:
+            provider_set.add(provider_name)  # type: ignore
+
+        # Then try multi-provider packages, but avoid duplicates
+        multi_provider_packages: List[SupportedAIProvider] = ['langchain', 'vercel']
+        for provider in multi_provider_packages:
+            provider_set.add(provider)
+
+        return list(provider_set)
+
+    @staticmethod
+    async def _try_create_provider(
+        provider_type: SupportedAIProvider,
+        ai_config: AIConfigKind,
+        logger: Optional[Any] = None,
+    ) -> Optional[AIProvider]:
+        """
+        Try to create a provider of the specified type.
+        
+        :param provider_type: Type of provider to create
+        :param ai_config: AI configuration
+        :param logger: Optional logger
+        :return: AIProvider instance or None if creation failed
+        """
+        provider_mappings = {
+            'openai': ('launchdarkly_server_sdk_ai_openai', 'OpenAIProvider'),
+            'langchain': ('launchdarkly_server_sdk_ai_langchain', 'LangChainProvider'),
+            'vercel': ('launchdarkly_server_sdk_ai_vercel', 'VercelProvider'),
+        }
+
+        if provider_type not in provider_mappings:
+            return None
+
+        package_name, provider_class_name = provider_mappings[provider_type]
+        return await AIProviderFactory._create_provider(
+            package_name, provider_class_name, ai_config, logger
+        )
+
+    @staticmethod
+    async def _create_provider(
+        package_name: str,
+        provider_class_name: str,
+        ai_config: AIConfigKind,
+        logger: Optional[Any] = None,
+    ) -> Optional[AIProvider]:
+        """
+        Create a provider instance dynamically.
+        
+        :param package_name: Name of the package containing the provider
+        :param provider_class_name: Name of the provider class
+        :param ai_config: AI configuration
+        :param logger: Optional logger
+        :return: AIProvider instance or None if creation failed
+        """
+        try:
+            # Try to dynamically import the provider
+            # This will work if the package is installed
+            module = importlib.import_module(package_name)
+            provider_class: Type[AIProvider] = getattr(module, provider_class_name)
+
+            provider = await provider_class.create(ai_config, logger)
+            if logger:
+                logger.debug(
+                    f"Successfully created AIProvider for: {ai_config.provider.name if ai_config.provider else 'unknown'} "
+                    f"with package {package_name}"
+                )
+            return provider
+        except (ImportError, AttributeError, Exception) as error:
+            # If the provider is not available or creation fails, return None
+            if logger:
+                logger.warn(
+                    f"Error creating AIProvider for: {ai_config.provider.name if ai_config.provider else 'unknown'} "
+                    f"with package {package_name}: {error}"
+                )
+            return None
+
diff --git a/ldai/providers/types.py b/ldai/providers/types.py
new file mode 100644
index 0000000..4bfd692
--- /dev/null
+++ b/ldai/providers/types.py
@@ -0,0 +1,37 @@
+"""Types for AI provider responses."""
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+from ldai.models import LDMessage
+from ldai.tracker import TokenUsage
+
+
+@dataclass
+class LDAIMetrics:
+    """
+    Metrics information for AI operations that includes success status and token usage.
+    """
+    success: bool
+    usage: Optional[TokenUsage] = None
+
+
+@dataclass
+class ChatResponse:
+    """
+    Chat response structure.
+    """
+    message: LDMessage
+    metrics: LDAIMetrics
+    evaluations: Optional[List[Any]] = None  # List of JudgeResponse, will be populated later
+
+
+@dataclass
+class StructuredResponse:
+    """
+    Structured response from AI models.
+    """
+    data: Dict[str, Any]
+    raw_response: str
+    metrics: LDAIMetrics
+

From 6ee62b45e4e6559ba127f5ed5deae72a1d06eebe Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Fri, 7 Nov 2025 22:51:41 +0000
Subject: [PATCH 06/37] create a langchain implementation of the ai provider

---
 ldai/providers/__init__.py            |  20 +-
 ldai/providers/ai_provider_factory.py |  21 +-
 ldai/providers/langchain/__init__.py  | 284 ++++++++++++++++++++++++++
 3 files changed, 316 insertions(+), 9 deletions(-)
 create mode 100644 ldai/providers/langchain/__init__.py

diff --git a/ldai/providers/__init__.py b/ldai/providers/__init__.py
index 8cac547..1beffb4 100644
--- a/ldai/providers/__init__.py
+++ b/ldai/providers/__init__.py
@@ -3,9 +3,19 @@
 from ldai.providers.ai_provider import AIProvider
 from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider
 
-__all__ = [
-    'AIProvider',
-    'AIProviderFactory',
-    'SupportedAIProvider',
-]
+# Export LangChain provider if available
+try:
+    from ldai.providers.langchain import LangChainProvider
+    __all__ = [
+        'AIProvider',
+        'AIProviderFactory',
+        'LangChainProvider',
+        'SupportedAIProvider',
+    ]
+except ImportError:
+    __all__ = [
+        'AIProvider',
+        'AIProviderFactory',
+        'SupportedAIProvider',
+    ]
 
diff --git a/ldai/providers/ai_provider_factory.py b/ldai/providers/ai_provider_factory.py
index dab3796..41cc1c2 100644
--- a/ldai/providers/ai_provider_factory.py
+++ b/ldai/providers/ai_provider_factory.py
@@ -80,7 +80,7 @@ def _get_providers_to_try(
             provider_set.add(provider_name)  # type: ignore
 
         # Then try multi-provider packages, but avoid duplicates
-        multi_provider_packages: List[SupportedAIProvider] = ['langchain', 'vercel']
+        multi_provider_packages: List[SupportedAIProvider] = ['langchain']
         for provider in multi_provider_packages:
             provider_set.add(provider)
 
@@ -100,10 +100,23 @@ async def _try_create_provider(
         :param logger: Optional logger
         :return: AIProvider instance or None if creation failed
         """
+        # Handle built-in providers (part of this package)
+        if provider_type == 'langchain':
+            try:
+                from ldai.providers.langchain import LangChainProvider
+                return await LangChainProvider.create(ai_config, logger)
+            except ImportError as error:
+                if logger:
+                    logger.warn(
+                        f"Error creating LangChainProvider: {error}. "
+                        f"Make sure langchain and langchain-core packages are installed."
+                    )
+                return None
+
+        # For future external providers, use dynamic import
         provider_mappings = {
-            'openai': ('launchdarkly_server_sdk_ai_openai', 'OpenAIProvider'),
-            'langchain': ('launchdarkly_server_sdk_ai_langchain', 'LangChainProvider'),
-            'vercel': ('launchdarkly_server_sdk_ai_vercel', 'VercelProvider'),
+            # 'openai': ('launchdarkly_server_sdk_ai_openai', 'OpenAIProvider'),
+            # 'vercel': ('launchdarkly_server_sdk_ai_vercel', 'VercelProvider'),
         }
 
         if provider_type not in provider_mappings:
diff --git a/ldai/providers/langchain/__init__.py b/ldai/providers/langchain/__init__.py
new file mode 100644
index 0000000..af84dc8
--- /dev/null
+++ b/ldai/providers/langchain/__init__.py
@@ -0,0 +1,284 @@
+"""LangChain implementation of AIProvider for LaunchDarkly AI SDK."""
+
+from typing import Any, Dict, List, Optional
+
+from langchain_core.chat_models import BaseChatModel
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
+
+from ldai.models import AIConfigKind, LDMessage
+from ldai.providers.ai_provider import AIProvider
+from ldai.providers.types import ChatResponse, LDAIMetrics, StructuredResponse
+from ldai.tracker import TokenUsage
+
+
+class LangChainProvider(AIProvider):
+    """
+    LangChain implementation of AIProvider.
+    
+    This provider integrates LangChain models with LaunchDarkly's tracking capabilities.
+    """
+
+    def __init__(self, llm: BaseChatModel, logger: Optional[Any] = None):
+        """
+        Initialize the LangChain provider.
+        
+        :param llm: LangChain BaseChatModel instance
+        :param logger: Optional logger for logging provider operations
+        """
+        super().__init__(logger)
+        self._llm = llm
+
+    # =============================================================================
+    # MAIN FACTORY METHOD
+    # =============================================================================
+
+    @staticmethod
+    async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'LangChainProvider':
+        """
+        Static factory method to create a LangChain AIProvider from an AI configuration.
+        
+        :param ai_config: The LaunchDarkly AI configuration
+        :param logger: Optional logger for the provider
+        :return: Configured LangChainProvider instance
+        """
+        llm = await LangChainProvider.create_langchain_model(ai_config)
+        return LangChainProvider(llm, logger)
+
+    # =============================================================================
+    # INSTANCE METHODS (AIProvider Implementation)
+    # =============================================================================
+
+    async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
+        """
+        Invoke the LangChain model with an array of messages.
+        
+        :param messages: Array of LDMessage objects representing the conversation
+        :return: ChatResponse containing the model's response
+        """
+        try:
+            # Convert LDMessage[] to LangChain messages
+            langchain_messages = LangChainProvider.convert_messages_to_langchain(messages)
+
+            # Get the LangChain response
+            response: AIMessage = await self._llm.ainvoke(langchain_messages)
+
+            # Generate metrics early (assumes success by default)
+            metrics = LangChainProvider.get_ai_metrics_from_response(response)
+
+            # Extract text content from the response
+            content: str = ''
+            if isinstance(response.content, str):
+                content = response.content
+            else:
+                # Log warning for non-string content (likely multimodal)
+                if self.logger:
+                    self.logger.warn(
+                        f"Multimodal response not supported, expecting a string. "
+                        f"Content type: {type(response.content)}, Content: {response.content}"
+                    )
+                # Update metrics to reflect content loss
+                metrics.success = False
+
+            # Create the assistant message
+            from ldai.models import LDMessage
+            assistant_message = LDMessage(role='assistant', content=content)
+
+            return ChatResponse(
+                message=assistant_message,
+                metrics=metrics,
+            )
+        except Exception as error:
+            if self.logger:
+                self.logger.warn(f'LangChain model invocation failed: {error}')
+
+            from ldai.models import LDMessage
+            return ChatResponse(
+                message=LDMessage(role='assistant', content=''),
+                metrics=LDAIMetrics(success=False, usage=None),
+            )
+
+    async def invoke_structured_model(
+        self,
+        messages: List[LDMessage],
+        response_structure: Dict[str, Any],
+    ) -> StructuredResponse:
+        """
+        Invoke the LangChain model with structured output support.
+        
+        :param messages: Array of LDMessage objects representing the conversation
+        :param response_structure: Dictionary of output configurations keyed by output name
+        :return: StructuredResponse containing the structured data
+        """
+        try:
+            # Convert LDMessage[] to LangChain messages
+            langchain_messages = LangChainProvider.convert_messages_to_langchain(messages)
+
+            # Get the LangChain response with structured output
+            # Note: with_structured_output is available on BaseChatModel in newer LangChain versions
+            if hasattr(self._llm, 'with_structured_output'):
+                structured_llm = self._llm.with_structured_output(response_structure)
+                response = await structured_llm.ainvoke(langchain_messages)
+            else:
+                # Fallback: invoke normally and try to parse as JSON
+                response_obj = await self._llm.ainvoke(langchain_messages)
+                if isinstance(response_obj, AIMessage):
+                    import json
+                    try:
+                        response = json.loads(response_obj.content)
+                    except json.JSONDecodeError:
+                        response = {'content': response_obj.content}
+                else:
+                    response = response_obj
+
+            # Using structured output doesn't support metrics
+            metrics = LDAIMetrics(
+                success=True,
+                usage=TokenUsage(total=0, input=0, output=0),
+            )
+
+            import json
+            return StructuredResponse(
+                data=response if isinstance(response, dict) else {'result': response},
+                raw_response=json.dumps(response) if not isinstance(response, str) else response,
+                metrics=metrics,
+            )
+        except Exception as error:
+            if self.logger:
+                self.logger.warn(f'LangChain structured model invocation failed: {error}')
+
+            return StructuredResponse(
+                data={},
+                raw_response='',
+                metrics=LDAIMetrics(
+                    success=False,
+                    usage=TokenUsage(total=0, input=0, output=0),
+                ),
+            )
+
+    def get_chat_model(self) -> BaseChatModel:
+        """
+        Get the underlying LangChain model instance.
+        
+        :return: The LangChain BaseChatModel instance
+        """
+        return self._llm
+
+    # =============================================================================
+    # STATIC UTILITY METHODS
+    # =============================================================================
+
+    @staticmethod
+    def map_provider(ld_provider_name: str) -> str:
+        """
+        Map LaunchDarkly provider names to LangChain provider names.
+        
+        This method enables seamless integration between LaunchDarkly's standardized
+        provider naming and LangChain's naming conventions.
+        
+        :param ld_provider_name: LaunchDarkly provider name
+        :return: LangChain provider name
+        """
+        lowercased_name = ld_provider_name.lower()
+
+        mapping: Dict[str, str] = {
+            'gemini': 'google-genai',
+        }
+
+        return mapping.get(lowercased_name, lowercased_name)
+
+    @staticmethod
+    def get_ai_metrics_from_response(response: AIMessage) -> LDAIMetrics:
+        """
+        Get AI metrics from a LangChain provider response.
+        
+        This method extracts token usage information and success status from LangChain responses
+        and returns a LaunchDarkly LDAIMetrics object.
+        
+        :param response: The response from the LangChain model
+        :return: LDAIMetrics with success status and token usage
+        """
+        # Extract token usage if available
+        usage: Optional[TokenUsage] = None
+        if hasattr(response, 'response_metadata') and response.response_metadata:
+            token_usage = response.response_metadata.get('token_usage')
+            if token_usage:
+                usage = TokenUsage(
+                    total=token_usage.get('total_tokens', 0) or token_usage.get('totalTokens', 0) or 0,
+                    input=token_usage.get('prompt_tokens', 0) or token_usage.get('promptTokens', 0) or 0,
+                    output=token_usage.get('completion_tokens', 0) or token_usage.get('completionTokens', 0) or 0,
+                )
+
+        # LangChain responses that complete successfully are considered successful by default
+        return LDAIMetrics(success=True, usage=usage)
+
+    @staticmethod
+    def convert_messages_to_langchain(messages: List[LDMessage]) -> List[BaseMessage]:
+        """
+        Convert LaunchDarkly messages to LangChain messages.
+        
+        This helper method enables developers to work directly with LangChain message types
+        while maintaining compatibility with LaunchDarkly's standardized message format.
+        
+        :param messages: List of LDMessage objects
+        :return: List of LangChain message objects
+        """
+        result: List[BaseMessage] = []
+        for msg in messages:
+            if msg.role == 'system':
+                result.append(SystemMessage(content=msg.content))
+            elif msg.role == 'user':
+                result.append(HumanMessage(content=msg.content))
+            elif msg.role == 'assistant':
+                result.append(AIMessage(content=msg.content))
+            else:
+                raise ValueError(f'Unsupported message role: {msg.role}')
+        return result
+
+    @staticmethod
+    async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel:
+        """
+        Create a LangChain model from an AI configuration.
+        
+        This public helper method enables developers to initialize their own LangChain models
+        using LaunchDarkly AI configurations.
+        
+        :param ai_config: The LaunchDarkly AI configuration
+        :return: A configured LangChain BaseChatModel
+        """
+        model_name = ai_config.model.name if ai_config.model else ''
+        provider = ai_config.provider.name if ai_config.provider else ''
+        parameters = ai_config.model.get_parameter('parameters') if ai_config.model else {}
+        if not isinstance(parameters, dict):
+            parameters = {}
+
+        # Use LangChain's init_chat_model to support multiple providers
+        # Note: This requires langchain package to be installed
+        try:
+            # Try to import init_chat_model from langchain.chat_models
+            # This is available in langchain >= 0.1.0
+            try:
+                from langchain.chat_models import init_chat_model
+            except ImportError:
+                # Fallback for older versions or different import path
+                from langchain.chat_models.universal import init_chat_model
+            
+            # Map provider name
+            langchain_provider = LangChainProvider.map_provider(provider)
+            
+            # Create model configuration
+            model_kwargs = {**parameters}
+            if langchain_provider:
+                model_kwargs['model_provider'] = langchain_provider
+            
+            # Initialize the chat model (init_chat_model may be async or sync)
+            result = init_chat_model(model_name, **model_kwargs)
+            # Handle both sync and async initialization
+            if hasattr(result, '__await__'):
+                return await result
+            return result
+        except ImportError as e:
+            raise ImportError(
+                'langchain package is required for LangChainProvider. '
+                'Install it with: pip install langchain langchain-core'
+            ) from e
+

From 231ae2e226766cb4ec2d11d2d0e69f34a792718c Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Sat, 8 Nov 2025 01:15:01 +0000
Subject: [PATCH 07/37] Add Judge and evaluation metric tracking

---
 ldai/__init__.py                        |  11 +-
 ldai/client.py                          |  78 +++++++-
 ldai/judge/__init__.py                  | 231 ++++++++++++++++++++++++
 ldai/judge/evaluation_schema_builder.py |  73 ++++++++
 ldai/models.py                          |  38 ++--
 ldai/providers/types.py                 |  19 ++
 ldai/tracker.py                         | 101 ++++++++++-
 7 files changed, 523 insertions(+), 28 deletions(-)
 create mode 100644 ldai/judge/__init__.py
 create mode 100644 ldai/judge/evaluation_schema_builder.py

diff --git a/ldai/__init__.py b/ldai/__init__.py
index 222c007..bba0bb1 100644
--- a/ldai/__init__.py
+++ b/ldai/__init__.py
@@ -13,7 +13,6 @@
     AICompletionConfigDefault,
     AIJudgeConfig,
     AIJudgeConfigDefault,
-    Judge,
     JudgeConfiguration,
     LDMessage,
     ModelConfig,
@@ -25,6 +24,12 @@
     LDAIAgentDefaults,
 )
 
+# Export judge
+from ldai.judge import AIJudge
+
+# Export judge types
+from ldai.providers.types import EvalScore, JudgeResponse
+
 __all__ = [
     'LDAIClient',
     'AIAgentConfig',
@@ -35,8 +40,10 @@
     'AICompletionConfigDefault',
     'AIJudgeConfig',
     'AIJudgeConfigDefault',
-    'Judge',
+    'AIJudge',
+    'EvalScore',
     'JudgeConfiguration',
+    'JudgeResponse',
     'LDMessage',
     'ModelConfig',
     'ProviderConfig',
diff --git a/ldai/client.py b/ldai/client.py
index 4f3cc9e..248fcb6 100644
--- a/ldai/client.py
+++ b/ldai/client.py
@@ -4,6 +4,7 @@
 from ldclient import Context
 from ldclient.client import LDClient
 
+from ldai.judge import AIJudge
 from ldai.models import (
     AIAgentConfig,
     AIAgentConfigDefault,
@@ -13,12 +14,12 @@
     AICompletionConfigDefault,
     AIJudgeConfig,
     AIJudgeConfigDefault,
-    Judge,
     JudgeConfiguration,
     LDMessage,
     ModelConfig,
     ProviderConfig,
 )
+from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider
 from ldai.tracker import LDAIConfigTracker
 
 
@@ -118,6 +119,79 @@ def judge_config(
 
         return config
 
+    async def create_judge(
+        self,
+        key: str,
+        context: Context,
+        default_value: AIJudgeConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+        default_ai_provider: Optional[SupportedAIProvider] = None,
+    ) -> Optional[AIJudge]:
+        """
+        Creates and returns a new Judge instance for AI evaluation.
+
+        :param key: The key identifying the AI judge configuration to use
+        :param context: Standard Context used when evaluating flags
+        :param default_value: A default value representing a standard AI config result
+        :param variables: Dictionary of values for instruction interpolation.
+            The variables `message_history` and `response_to_evaluate` are reserved for the judge and will be ignored.
+        :param default_ai_provider: Optional default AI provider to use.
+        :return: Judge instance or None if disabled/unsupported
+
+        Example::
+
+            judge = client.create_judge(
+                "relevance-judge",
+                context,
+                AIJudgeConfigDefault(
+                    enabled=True,
+                    model=ModelConfig("gpt-4"),
+                    provider=ProviderConfig("openai"),
+                    evaluation_metric_keys=['$ld:ai:judge:relevance'],
+                    messages=[LDMessage(role='system', content='You are a relevance judge.')]
+                ),
+                variables={'metric': "relevance"}
+            )
+
+            if judge:
+                result = await judge.evaluate("User question", "AI response")
+                if result and result.evals:
+                    relevance_eval = result.evals.get('$ld:ai:judge:relevance')
+                    if relevance_eval:
+                        print('Relevance score:', relevance_eval.score)
+        """
+        self._client.track('$ld:ai:judge:function:createJudge', context, key, 1)
+
+        try:
+            # Warn if reserved variables are provided
+            if variables:
+                if 'message_history' in variables:
+                    # Note: Python doesn't have a logger on the client, but we could add one
+                    pass  # Would log warning if logger available
+                if 'response_to_evaluate' in variables:
+                    pass  # Would log warning if logger available
+
+            # Overwrite reserved variables to ensure they remain as placeholders for judge evaluation
+            extended_variables = dict(variables) if variables else {}
+            extended_variables['message_history'] = '{{message_history}}'
+            extended_variables['response_to_evaluate'] = '{{response_to_evaluate}}'
+
+            judge_config = self.judge_config(key, context, default_value, extended_variables)
+
+            if not judge_config.enabled or not judge_config.tracker:
+                # Would log info if logger available
+                return None
+
+            # Create AI provider for the judge
+            provider = await AIProviderFactory.create(judge_config, None, default_ai_provider)
+            if not provider:
+                return None
+
+            return AIJudge(judge_config, judge_config.tracker, provider, None)
+        except Exception as error:
+            # Would log error if logger available
+            return None
+
     def agent_config(
         self,
         key: str,
@@ -337,7 +411,7 @@ def __evaluate(
             judge_config = variation['judgeConfiguration']
             if 'judges' in judge_config and isinstance(judge_config['judges'], list):
                 judges = [
-                    Judge(
+                    JudgeConfiguration.Judge(
                         key=judge['key'],
                         sampling_rate=judge['samplingRate']
                     )
diff --git a/ldai/judge/__init__.py b/ldai/judge/__init__.py
new file mode 100644
index 0000000..323cd19
--- /dev/null
+++ b/ldai/judge/__init__.py
@@ -0,0 +1,231 @@
+"""Judge implementation for AI evaluation."""
+
+import random
+from typing import Any, Dict, Optional
+
+import chevron
+
+from ldai.models import AIJudgeConfig, LDMessage
+from ldai.providers.ai_provider import AIProvider
+from ldai.providers.types import ChatResponse, EvalScore, JudgeResponse, StructuredResponse
+from ldai.tracker import LDAIConfigTracker
+from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder
+
+
+class AIJudge:
+    """
+    Judge implementation that handles evaluation functionality and conversation management.
+    
+    According to the AIEval spec, judges are AI Configs with mode: "judge" that evaluate
+    other AI Configs using structured output.
+    """
+
+    def __init__(
+        self,
+        ai_config: AIJudgeConfig,
+        ai_config_tracker: LDAIConfigTracker,
+        ai_provider: AIProvider,
+        logger: Optional[Any] = None,
+    ):
+        """
+        Initialize the Judge.
+        
+        :param ai_config: The judge AI configuration
+        :param ai_config_tracker: The tracker for the judge configuration
+        :param ai_provider: The AI provider to use for evaluation
+        :param logger: Optional logger for logging
+        """
+        self._ai_config = ai_config
+        self._ai_config_tracker = ai_config_tracker
+        self._ai_provider = ai_provider
+        self._logger = logger
+        self._evaluation_response_structure = EvaluationSchemaBuilder.build(
+            ai_config.evaluation_metric_keys
+        )
+
+    async def evaluate(
+        self,
+        input_text: str,
+        output_text: str,
+        sampling_rate: float = 1.0,
+    ) -> Optional[JudgeResponse]:
+        """
+        Evaluates an AI response using the judge's configuration.
+        
+        :param input_text: The input prompt or question that was provided to the AI
+        :param output_text: The AI-generated response to be evaluated
+        :param sampling_rate: Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1)
+        :return: Evaluation results or None if not sampled
+        """
+        try:
+            if not self._ai_config.evaluation_metric_keys or len(self._ai_config.evaluation_metric_keys) == 0:
+                if self._logger:
+                    self._logger.warn(
+                        'Judge configuration is missing required evaluationMetricKeys'
+                    )
+                return None
+
+            if not self._ai_config.messages:
+                if self._logger:
+                    self._logger.warn('Judge configuration must include messages')
+                return None
+
+            if random.random() > sampling_rate:
+                if self._logger:
+                    self._logger.debug(f'Judge evaluation skipped due to sampling rate: {sampling_rate}')
+                return None
+
+            messages = self._construct_evaluation_messages(input_text, output_text)
+
+            # Track metrics of the structured model invocation
+            response = await self._ai_config_tracker.track_metrics_of(
+                lambda result: result.metrics,
+                lambda: self._ai_provider.invoke_structured_model(messages, self._evaluation_response_structure)
+            )
+
+            success = response.metrics.success
+
+            evals = self._parse_evaluation_response(response.data)
+
+            if len(evals) != len(self._ai_config.evaluation_metric_keys):
+                if self._logger:
+                    self._logger.warn('Judge evaluation did not return all evaluations')
+                success = False
+
+            return JudgeResponse(
+                evals=evals,
+                success=success,
+            )
+        except Exception as error:
+            if self._logger:
+                self._logger.error(f'Judge evaluation failed: {error}')
+            return JudgeResponse(
+                evals={},
+                success=False,
+                error=str(error) if isinstance(error, Exception) else 'Unknown error',
+            )
+
+    async def evaluate_messages(
+        self,
+        messages: list[LDMessage],
+        response: ChatResponse,
+        sampling_ratio: float = 1.0,
+    ) -> Optional[JudgeResponse]:
+        """
+        Evaluates an AI response from chat messages and response.
+        
+        :param messages: Array of messages representing the conversation history
+        :param response: The AI response to be evaluated
+        :param sampling_ratio: Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1)
+        :return: Evaluation results or None if not sampled
+        """
+        input_text = '\r\n'.join([msg.content for msg in messages]) if messages else ''
+        output_text = response.message.content
+
+        return await self.evaluate(input_text, output_text, sampling_ratio)
+
+    def get_ai_config(self) -> AIJudgeConfig:
+        """
+        Returns the AI Config used by this judge.
+        
+        :return: The judge AI configuration
+        """
+        return self._ai_config
+
+    def get_tracker(self) -> LDAIConfigTracker:
+        """
+        Returns the tracker associated with this judge.
+        
+        :return: The tracker for the judge configuration
+        """
+        return self._ai_config_tracker
+
+    def get_provider(self) -> AIProvider:
+        """
+        Returns the AI provider used by this judge.
+        
+        :return: The AI provider
+        """
+        return self._ai_provider
+
+    def _construct_evaluation_messages(self, input_text: str, output_text: str) -> list[LDMessage]:
+        """
+        Constructs evaluation messages by combining judge's config messages with input/output.
+        
+        :param input_text: The input text
+        :param output_text: The output text to evaluate
+        :return: List of messages for evaluation
+        """
+        if not self._ai_config.messages:
+            return []
+
+        messages: list[LDMessage] = []
+        for msg in self._ai_config.messages:
+            # Interpolate message content with reserved variables
+            content = self._interpolate_message(msg.content, {
+                'message_history': input_text,
+                'response_to_evaluate': output_text,
+            })
+            messages.append(LDMessage(role=msg.role, content=content))
+
+        return messages
+
+    def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str:
+        """
+        Interpolates message content with variables using Mustache templating.
+        
+        :param content: The message content template
+        :param variables: Variables to interpolate
+        :return: Interpolated message content
+        """
+        # Use chevron (Mustache) for templating, with no escaping
+        return chevron.render(content, variables)
+
+    def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScore]:
+        """
+        Parses the structured evaluation response from the AI provider.
+        
+        :param data: The structured response data
+        :return: Dictionary of evaluation scores keyed by metric key
+        """
+        results: Dict[str, EvalScore] = {}
+
+        if not data.get('evaluations') or not isinstance(data['evaluations'], dict):
+            if self._logger:
+                self._logger.warn('Invalid response: missing or invalid evaluations object')
+            return results
+
+        evaluations = data['evaluations']
+
+        for metric_key in self._ai_config.evaluation_metric_keys:
+            evaluation = evaluations.get(metric_key)
+
+            if not evaluation or not isinstance(evaluation, dict):
+                if self._logger:
+                    self._logger.warn(f'Missing evaluation for metric key: {metric_key}')
+                continue
+
+            score = evaluation.get('score')
+            reasoning = evaluation.get('reasoning')
+
+            if not isinstance(score, (int, float)) or score < 0 or score > 1:
+                if self._logger:
+                    self._logger.warn(
+                        f'Invalid score evaluated for {metric_key}: {score}. '
+                        'Score must be a number between 0 and 1 inclusive'
+                    )
+                continue
+
+            if not isinstance(reasoning, str):
+                if self._logger:
+                    self._logger.warn(
+                        f'Invalid reasoning evaluated for {metric_key}: {reasoning}. '
+                        'Reasoning must be a string'
+                    )
+                continue
+
+            results[metric_key] = EvalScore(score=float(score), reasoning=reasoning)
+
+        return results
+
+
diff --git a/ldai/judge/evaluation_schema_builder.py b/ldai/judge/evaluation_schema_builder.py
new file mode 100644
index 0000000..d8d8fa4
--- /dev/null
+++ b/ldai/judge/evaluation_schema_builder.py
@@ -0,0 +1,73 @@
+"""Internal class for building dynamic evaluation response schemas."""
+
+from typing import Any, Dict
+
+
+class EvaluationSchemaBuilder:
+    """
+    Internal class for building dynamic evaluation response schemas.
+    Not exported - only used internally by Judge.
+    """
+
+    @staticmethod
+    def build(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
+        """
+        Build an evaluation response schema from evaluation metric keys.
+        
+        :param evaluation_metric_keys: List of evaluation metric keys
+        :return: Schema dictionary for structured output
+        """
+        return {
+            'type': 'object',
+            'properties': {
+                'evaluations': {
+                    'type': 'object',
+                    'description': f"Object containing evaluation results for {', '.join(evaluation_metric_keys)} metrics",
+                    'properties': EvaluationSchemaBuilder._build_key_properties(evaluation_metric_keys),
+                    'required': evaluation_metric_keys,
+                    'additionalProperties': False,
+                },
+            },
+            'required': ['evaluations'],
+            'additionalProperties': False,
+        }
+
+    @staticmethod
+    def _build_key_properties(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
+        """
+        Build properties for each evaluation metric key.
+        
+        :param evaluation_metric_keys: List of evaluation metric keys
+        :return: Dictionary of properties for each key
+        """
+        result: Dict[str, Any] = {}
+        for key in evaluation_metric_keys:
+            result[key] = EvaluationSchemaBuilder._build_key_schema(key)
+        return result
+
+    @staticmethod
+    def _build_key_schema(key: str) -> Dict[str, Any]:
+        """
+        Build schema for a single evaluation metric key.
+        
+        :param key: Evaluation metric key
+        :return: Schema dictionary for the key
+        """
+        return {
+            'type': 'object',
+            'properties': {
+                'score': {
+                    'type': 'number',
+                    'minimum': 0,
+                    'maximum': 1,
+                    'description': f'Score between 0.0 and 1.0 for {key}',
+                },
+                'reasoning': {
+                    'type': 'string',
+                    'description': f'Reasoning behind the score for {key}',
+                },
+            },
+            'required': ['score', 'reasoning'],
+            'additionalProperties': False,
+        }
+
diff --git a/ldai/models.py b/ldai/models.py
index 0b961f7..4531f8f 100644
--- a/ldai/models.py
+++ b/ldai/models.py
@@ -105,30 +105,30 @@ def to_dict(self) -> dict:
 # Judge Types
 # ============================================================================
 
-@dataclass(frozen=True)
-class Judge:
-    """
-    Configuration for a single judge attachment.
-    """
-    key: str
-    sampling_rate: float
-
-    def to_dict(self) -> dict:
-        """
-        Render the judge as a dictionary object.
-        """
-        return {
-            'key': self.key,
-            'samplingRate': self.sampling_rate,
-        }
-
-
 @dataclass(frozen=True)
 class JudgeConfiguration:
     """
     Configuration for judge attachment to AI Configs.
     """
-    judges: List[Judge]
+    
+    @dataclass(frozen=True)
+    class Judge:
+        """
+        Configuration for a single judge attachment.
+        """
+        key: str
+        sampling_rate: float
+
+        def to_dict(self) -> dict:
+            """
+            Render the judge as a dictionary object.
+            """
+            return {
+                'key': self.key,
+                'samplingRate': self.sampling_rate,
+            }
+    
+    judges: List['JudgeConfiguration.Judge']
 
     def to_dict(self) -> dict:
         """
diff --git a/ldai/providers/types.py b/ldai/providers/types.py
index 4bfd692..58ca3fc 100644
--- a/ldai/providers/types.py
+++ b/ldai/providers/types.py
@@ -35,3 +35,22 @@ class StructuredResponse:
     raw_response: str
     metrics: LDAIMetrics
 
+
+@dataclass
+class EvalScore:
+    """
+    Score and reasoning for a single evaluation metric.
+    """
+    score: float  # Score between 0.0 and 1.0
+    reasoning: str  # Reasoning behind the provided score
+
+
+@dataclass
+class JudgeResponse:
+    """
+    Response from a judge evaluation containing scores and reasoning for multiple metrics.
+    """
+    evals: Dict[str, EvalScore]  # Dictionary where keys are metric names and values contain score and reasoning
+    success: bool  # Whether the evaluation completed successfully
+    error: Optional[str] = None  # Error message if evaluation failed
+
diff --git a/ldai/tracker.py b/ldai/tracker.py
index a049952..632f0f4 100644
--- a/ldai/tracker.py
+++ b/ldai/tracker.py
@@ -1,7 +1,7 @@
 import time
 from dataclasses import dataclass
 from enum import Enum
-from typing import Dict, Optional
+from typing import Any, Dict, Optional
 
 from ldclient import Context, LDClient
 
@@ -144,7 +144,7 @@ def track_duration_of(self, func):
         An exception occurring during the execution of the function will still
         track the duration. The exception will be re-thrown.
 
-        :param func: Function to track.
+        :param func: Function to track (synchronous only).
         :return: Result of the tracked function.
         """
         start_time = time.time()
@@ -157,6 +157,90 @@ def track_duration_of(self, func):
 
         return result
 
+    async def track_metrics_of(self, metrics_extractor, func):
+        """
+        Track metrics for a generic AI operation.
+
+        This function will track the duration of the operation, extract metrics using the provided
+        metrics extractor function, and track success or error status accordingly.
+
+        If the provided function throws, then this method will also throw.
+        In the case the provided function throws, this function will record the duration and an error.
+        A failed operation will not have any token usage data.
+
+        :param metrics_extractor: Function that extracts LDAIMetrics from the operation result
+        :param func: Async function which executes the operation
+        :return: The result of the operation
+        """
+        start_time = time.time()
+        result = None
+        try:
+            result = await func()
+        except Exception as err:
+            end_time = time.time()
+            duration = int((end_time - start_time) * 1000)
+            self.track_duration(duration)
+            self.track_error()
+            raise err
+
+        # Track duration after successful call
+        end_time = time.time()
+        duration = int((end_time - start_time) * 1000)
+        self.track_duration(duration)
+
+        # Extract metrics after successful AI call
+        from ldai.providers.types import LDAIMetrics
+        metrics = metrics_extractor(result)
+
+        # Track success/error based on metrics
+        if metrics.success:
+            self.track_success()
+        else:
+            self.track_error()
+
+        # Track token usage if available
+        if metrics.usage:
+            self.track_tokens(metrics.usage)
+
+        return result
+
+    def track_eval_scores(self, scores: Dict[str, Any]) -> None:
+        """
+        Track evaluation scores for multiple metrics.
+
+        :param scores: Dictionary mapping metric keys to their evaluation scores (EvalScore objects)
+        """
+        from ldai.providers.types import EvalScore
+        
+        # Track each evaluation score individually
+        for metric_key, eval_score in scores.items():
+            if isinstance(eval_score, EvalScore):
+                self._ld_client.track(
+                    metric_key,
+                    self._context,
+                    self.__get_track_data(),
+                    eval_score.score
+                )
+
+    def track_judge_response(self, judge_response: Any) -> None:
+        """
+        Track a judge response, including evaluation scores and success status.
+
+        :param judge_response: JudgeResponse object containing evals and success status
+        """
+        from ldai.providers.types import JudgeResponse
+        
+        if isinstance(judge_response, JudgeResponse):
+            # Track evaluation scores
+            if judge_response.evals:
+                self.track_eval_scores(judge_response.evals)
+            
+            # Track success/error based on judge response
+            if judge_response.success:
+                self.track_success()
+            else:
+                self.track_error()
+
     def track_feedback(self, feedback: Dict[str, FeedbackKind]) -> None:
         """
         Track user feedback for an AI operation.
@@ -197,7 +281,7 @@ def track_error(self) -> None:
             "$ld:ai:generation:error", self._context, self.__get_track_data(), 1
         )
 
-    def track_openai_metrics(self, func):
+    async def track_openai_metrics(self, func):
         """
         Track OpenAI-specific operations.
 
@@ -211,15 +295,22 @@ def track_openai_metrics(self, func):
 
         A failed operation will not have any token usage data.
 
-        :param func: Function to track.
+        :param func: Async function to track.
         :return: Result of the tracked function.
         """
+        start_time = time.time()
         try:
-            result = self.track_duration_of(func)
+            result = await func()
+            end_time = time.time()
+            duration = int((end_time - start_time) * 1000)
+            self.track_duration(duration)
             self.track_success()
             if hasattr(result, "usage") and hasattr(result.usage, "to_dict"):
                 self.track_tokens(_openai_to_token_usage(result.usage.to_dict()))
         except Exception:
+            end_time = time.time()
+            duration = int((end_time - start_time) * 1000)
+            self.track_duration(duration)
             self.track_error()
             raise
 

From 445ab8c9c3488221ea39dfeec94cdd5235d8581e Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Sat, 8 Nov 2025 03:43:01 +0000
Subject: [PATCH 08/37] Add Chat implementation

---
 ldai/__init__.py      |   4 +
 ldai/chat/__init__.py | 191 ++++++++++++++++++++++++++++++++++++++++++
 ldai/client.py        | 109 ++++++++++++++++++++++++
 3 files changed, 304 insertions(+)
 create mode 100644 ldai/chat/__init__.py

diff --git a/ldai/__init__.py b/ldai/__init__.py
index bba0bb1..617ac3a 100644
--- a/ldai/__init__.py
+++ b/ldai/__init__.py
@@ -27,6 +27,9 @@
 # Export judge
 from ldai.judge import AIJudge
 
+# Export chat
+from ldai.chat import TrackedChat
+
 # Export judge types
 from ldai.providers.types import EvalScore, JudgeResponse
 
@@ -41,6 +44,7 @@
     'AIJudgeConfig',
     'AIJudgeConfigDefault',
     'AIJudge',
+    'TrackedChat',
     'EvalScore',
     'JudgeConfiguration',
     'JudgeResponse',
diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py
new file mode 100644
index 0000000..5bdf37a
--- /dev/null
+++ b/ldai/chat/__init__.py
@@ -0,0 +1,191 @@
+"""TrackedChat implementation for managing AI chat conversations."""
+
+from typing import Any, Dict, List, Optional
+
+from ldai.models import AICompletionConfig, LDMessage
+from ldai.providers.ai_provider import AIProvider
+from ldai.providers.types import ChatResponse, JudgeResponse
+from ldai.judge import AIJudge
+from ldai.tracker import LDAIConfigTracker
+
+
+class TrackedChat:
+    """
+    Concrete implementation of TrackedChat that provides chat functionality
+    by delegating to an AIProvider implementation.
+    
+    This class handles conversation management and tracking, while delegating
+    the actual model invocation to the provider.
+    """
+
+    def __init__(
+        self,
+        ai_config: AICompletionConfig,
+        tracker: LDAIConfigTracker,
+        provider: AIProvider,
+        judges: Optional[Dict[str, AIJudge]] = None,
+        logger: Optional[Any] = None,
+    ):
+        """
+        Initialize the TrackedChat.
+        
+        :param ai_config: The completion AI configuration
+        :param tracker: The tracker for the completion configuration
+        :param provider: The AI provider to use for chat
+        :param judges: Optional dictionary of judge instances keyed by their configuration keys
+        :param logger: Optional logger for logging
+        """
+        self._ai_config = ai_config
+        self._tracker = tracker
+        self._provider = provider
+        self._judges = judges or {}
+        self._logger = logger
+        self._messages: List[LDMessage] = []
+
+    async def invoke(self, prompt: str) -> ChatResponse:
+        """
+        Invoke the chat model with a prompt string.
+        
+        This method handles conversation management and tracking, delegating to the provider's invoke_model method.
+        
+        :param prompt: The user prompt to send to the chat model
+        :return: ChatResponse containing the model's response and metrics
+        """
+        # Convert prompt string to LDMessage with role 'user' and add to conversation history
+        user_message: LDMessage = LDMessage(role='user', content=prompt)
+        self._messages.append(user_message)
+
+        # Prepend config messages to conversation history for model invocation
+        config_messages = self._ai_config.messages or []
+        all_messages = config_messages + self._messages
+
+        # Delegate to provider-specific implementation with tracking
+        response = await self._tracker.track_metrics_of(
+            lambda result: result.metrics,
+            lambda: self._provider.invoke_model(all_messages),
+        )
+
+        # Evaluate with judges if configured
+        if (
+            self._ai_config.judge_configuration
+            and self._ai_config.judge_configuration.judges
+            and len(self._ai_config.judge_configuration.judges) > 0
+        ):
+            evaluations = await self._evaluate_with_judges(self._messages, response)
+            response.evaluations = evaluations
+
+        # Add the response message to conversation history
+        self._messages.append(response.message)
+        return response
+
+    async def _evaluate_with_judges(
+        self,
+        messages: List[LDMessage],
+        response: ChatResponse,
+    ) -> List[Optional[JudgeResponse]]:
+        """
+        Evaluates the response with all configured judges.
+        
+        Returns a list of evaluation results.
+        
+        :param messages: Array of messages representing the conversation history
+        :param response: The AI response to be evaluated
+        :return: List of judge evaluation results (may contain None for failed evaluations)
+        """
+        if not self._ai_config.judge_configuration or not self._ai_config.judge_configuration.judges:
+            return []
+
+        judge_configs = self._ai_config.judge_configuration.judges
+
+        # Start all judge evaluations in parallel
+        async def evaluate_judge(judge_config):
+            judge = self._judges.get(judge_config.key)
+            if not judge:
+                if self._logger:
+                    self._logger.warn(
+                        f"Judge configuration is not enabled: {judge_config.key}",
+                    )
+                return None
+
+            eval_result = await judge.evaluate_messages(
+                messages, response, judge_config.sampling_rate
+            )
+
+            if eval_result and eval_result.success:
+                self._tracker.track_eval_scores(eval_result.evals)
+
+            return eval_result
+
+        # Ensure all evaluations complete even if some fail
+        import asyncio
+        evaluation_promises = [evaluate_judge(judge_config) for judge_config in judge_configs]
+        results = await asyncio.gather(*evaluation_promises, return_exceptions=True)
+        
+        # Map exceptions to None
+        return [
+            None if isinstance(result, Exception) else result
+            for result in results
+        ]
+
+    def get_config(self) -> AICompletionConfig:
+        """
+        Get the underlying AI configuration used to initialize this TrackedChat.
+        
+        :return: The AI completion configuration
+        """
+        return self._ai_config
+
+    def get_tracker(self) -> LDAIConfigTracker:
+        """
+        Get the underlying AI configuration tracker used to initialize this TrackedChat.
+        
+        :return: The tracker instance
+        """
+        return self._tracker
+
+    def get_provider(self) -> AIProvider:
+        """
+        Get the underlying AI provider instance.
+        
+        This provides direct access to the provider for advanced use cases.
+        
+        :return: The AI provider instance
+        """
+        return self._provider
+
+    def get_judges(self) -> Dict[str, AIJudge]:
+        """
+        Get the judges associated with this TrackedChat.
+        
+        Returns a dictionary of judge instances keyed by their configuration keys.
+        
+        :return: Dictionary of judge instances
+        """
+        return self._judges
+
+    def append_messages(self, messages: List[LDMessage]) -> None:
+        """
+        Append messages to the conversation history.
+        
+        Adds messages to the conversation history without invoking the model,
+        which is useful for managing multi-turn conversations or injecting context.
+        
+        :param messages: Array of messages to append to the conversation history
+        """
+        self._messages.extend(messages)
+
+    def get_messages(self, include_config_messages: bool = False) -> List[LDMessage]:
+        """
+        Get all messages in the conversation history.
+        
+        :param include_config_messages: Whether to include the config messages from the AIConfig.
+                                       Defaults to False.
+        :return: Array of messages. When include_config_messages is True, returns both config
+                messages and conversation history with config messages prepended. When False,
+                returns only the conversation history messages.
+        """
+        if include_config_messages:
+            config_messages = self._ai_config.messages or []
+            return config_messages + self._messages
+        return list(self._messages)
+
diff --git a/ldai/client.py b/ldai/client.py
index 248fcb6..91649d7 100644
--- a/ldai/client.py
+++ b/ldai/client.py
@@ -4,6 +4,7 @@
 from ldclient import Context
 from ldclient.client import LDClient
 
+from ldai.chat import TrackedChat
 from ldai.judge import AIJudge
 from ldai.models import (
     AIAgentConfig,
@@ -192,6 +193,114 @@ async def create_judge(
             # Would log error if logger available
             return None
 
+    async def _initialize_judges(
+        self,
+        judge_configs: List[JudgeConfiguration.Judge],
+        context: Context,
+        variables: Optional[Dict[str, Any]] = None,
+        default_ai_provider: Optional[SupportedAIProvider] = None,
+    ) -> Dict[str, AIJudge]:
+        """
+        Initialize judges from judge configurations.
+        
+        :param judge_configs: List of judge configurations
+        :param context: Standard Context used when evaluating flags
+        :param variables: Dictionary of values for instruction interpolation
+        :param default_ai_provider: Optional default AI provider to use
+        :return: Dictionary of judge instances keyed by their configuration keys
+        """
+        judges: Dict[str, AIJudge] = {}
+        
+        async def create_judge_for_config(judge_key: str):
+            judge = await self.create_judge(
+                judge_key,
+                context,
+                AIJudgeConfigDefault(enabled=False),
+                variables,
+                default_ai_provider,
+            )
+            return judge_key, judge
+        
+        judge_promises = [
+            create_judge_for_config(judge_config.key)
+            for judge_config in judge_configs
+        ]
+        
+        import asyncio
+        results = await asyncio.gather(*judge_promises, return_exceptions=True)
+        
+        for result in results:
+            if isinstance(result, Exception):
+                continue
+            judge_key, judge = result
+            if judge:
+                judges[judge_key] = judge
+        
+        return judges
+
+    async def create_chat(
+        self,
+        key: str,
+        context: Context,
+        default_value: AICompletionConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+        default_ai_provider: Optional[SupportedAIProvider] = None,
+    ) -> Optional[TrackedChat]:
+        """
+        Creates and returns a new TrackedChat instance for AI chat conversations.
+
+        :param key: The key identifying the AI completion configuration to use
+        :param context: Standard Context used when evaluating flags
+        :param default_value: A default value representing a standard AI config result
+        :param variables: Dictionary of values for instruction interpolation
+        :param default_ai_provider: Optional default AI provider to use
+        :return: TrackedChat instance or None if disabled/unsupported
+
+        Example::
+
+            chat = await client.create_chat(
+                "customer-support-chat",
+                context,
+                AICompletionConfigDefault(
+                    enabled=True,
+                    model=ModelConfig("gpt-4"),
+                    provider=ProviderConfig("openai"),
+                    messages=[LDMessage(role='system', content='You are a helpful assistant.')]
+                ),
+                variables={'customerName': 'John'}
+            )
+
+            if chat:
+                response = await chat.invoke("I need help with my order")
+                print(response.message.content)
+                
+                # Access conversation history
+                messages = chat.get_messages()
+                print(f"Conversation has {len(messages)} messages")
+        """
+        self._client.track('$ld:ai:config:function:createChat', context, key, 1)
+
+        config = self.completion_config(key, context, default_value, variables)
+
+        if not config.enabled or not config.tracker:
+            # Would log info if logger available
+            return None
+
+        provider = await AIProviderFactory.create(config, None, default_ai_provider)
+        if not provider:
+            return None
+
+        judges = {}
+        if config.judge_configuration and config.judge_configuration.judges:
+            judges = await self._initialize_judges(
+                config.judge_configuration.judges,
+                context,
+                variables,
+                default_ai_provider,
+            )
+
+        return TrackedChat(config, config.tracker, provider, judges, None)
+
     def agent_config(
         self,
         key: str,

From 5446222a18f7f723cad0fad6b3a0386144dd23ad Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Sat, 8 Nov 2025 03:53:43 +0000
Subject: [PATCH 09/37] Set a default for evaluation metircs

---
 ldai/models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ldai/models.py b/ldai/models.py
index 4531f8f..fa36f8c 100644
--- a/ldai/models.py
+++ b/ldai/models.py
@@ -1,5 +1,5 @@
 import warnings
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Any, Dict, List, Literal, Optional, Union
 
 from ldai.tracker import LDAIConfigTracker
@@ -302,7 +302,7 @@ class AIJudgeConfig(AIConfig):
     """
     Judge-specific AI Config with required evaluation metric key.
     """
-    evaluation_metric_keys: List[str]
+    evaluation_metric_keys: List[str] = field(default_factory=list)
     messages: Optional[List[LDMessage]] = None
 
     def to_dict(self) -> dict:

From bc46608ad4bb66125412ffb23e9e8963804e1aac Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Sat, 8 Nov 2025 17:24:24 +0000
Subject: [PATCH 10/37] add the logger

---
 ldai/client.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/ldai/client.py b/ldai/client.py
index 91649d7..4f4b6b9 100644
--- a/ldai/client.py
+++ b/ldai/client.py
@@ -1,3 +1,4 @@
+import logging
 from typing import Any, Dict, List, Optional, Tuple
 
 import chevron
@@ -29,6 +30,7 @@ class LDAIClient:
 
     def __init__(self, client: LDClient):
         self._client = client
+        self._logger = logging.getLogger('ldclient.ai')
 
     def completion_config(
         self,
@@ -184,11 +186,11 @@ async def create_judge(
                 return None
 
             # Create AI provider for the judge
-            provider = await AIProviderFactory.create(judge_config, None, default_ai_provider)
+            provider = await AIProviderFactory.create(judge_config, self._logger, default_ai_provider)
             if not provider:
                 return None
 
-            return AIJudge(judge_config, judge_config.tracker, provider, None)
+            return AIJudge(judge_config, judge_config.tracker, provider, self._logger)
         except Exception as error:
             # Would log error if logger available
             return None
@@ -279,14 +281,15 @@ async def create_chat(
                 print(f"Conversation has {len(messages)} messages")
         """
         self._client.track('$ld:ai:config:function:createChat', context, key, 1)
-
+        if self._logger:
+            self._logger.debug(f"Creating chat for key: {key}")
         config = self.completion_config(key, context, default_value, variables)
 
         if not config.enabled or not config.tracker:
             # Would log info if logger available
             return None
 
-        provider = await AIProviderFactory.create(config, None, default_ai_provider)
+        provider = await AIProviderFactory.create(config, self._logger, default_ai_provider)
         if not provider:
             return None
 
@@ -299,7 +302,7 @@ async def create_chat(
                 default_ai_provider,
             )
 
-        return TrackedChat(config, config.tracker, provider, judges, None)
+        return TrackedChat(config, config.tracker, provider, judges, self._logger)
 
     def agent_config(
         self,

From fd0aff476dbe6328906be71a734ac9c216d899b2 Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Sat, 8 Nov 2025 18:09:59 +0000
Subject: [PATCH 11/37] adjust langchain import

---
 ldai/providers/langchain/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ldai/providers/langchain/__init__.py b/ldai/providers/langchain/__init__.py
index af84dc8..f2e2c35 100644
--- a/ldai/providers/langchain/__init__.py
+++ b/ldai/providers/langchain/__init__.py
@@ -2,7 +2,7 @@
 
 from typing import Any, Dict, List, Optional
 
-from langchain_core.chat_models import BaseChatModel
+from langchain_core.language_models.chat_models import BaseChatModel
 from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
 
 from ldai.models import AIConfigKind, LDMessage

From c3c939f1b70654596d3b0cff02b28d4c98859515 Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Sat, 8 Nov 2025 18:35:13 +0000
Subject: [PATCH 12/37] fix structure response

---
 ldai/judge/evaluation_schema_builder.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ldai/judge/evaluation_schema_builder.py b/ldai/judge/evaluation_schema_builder.py
index d8d8fa4..1965e64 100644
--- a/ldai/judge/evaluation_schema_builder.py
+++ b/ldai/judge/evaluation_schema_builder.py
@@ -18,6 +18,8 @@ def build(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
         :return: Schema dictionary for structured output
         """
         return {
+            'title': 'EvaluationResponse',
+            'description': f"Response containing evaluation results for {', '.join(evaluation_metric_keys)} metrics",
             'type': 'object',
             'properties': {
                 'evaluations': {

From 125bb66252083ca983a9b46a209425f77ae9923c Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Sat, 8 Nov 2025 18:59:13 +0000
Subject: [PATCH 13/37] judge respose should be async

---
 ldai/chat/__init__.py   | 34 ++++++++++++++++------------------
 ldai/providers/types.py | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 18 deletions(-)

diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py
index 5bdf37a..0785c16 100644
--- a/ldai/chat/__init__.py
+++ b/ldai/chat/__init__.py
@@ -1,5 +1,6 @@
 """TrackedChat implementation for managing AI chat conversations."""
 
+import asyncio
 from typing import Any, Dict, List, Optional
 
 from ldai.models import AICompletionConfig, LDMessage
@@ -65,39 +66,39 @@ async def invoke(self, prompt: str) -> ChatResponse:
             lambda: self._provider.invoke_model(all_messages),
         )
 
-        # Evaluate with judges if configured
+        # Start judge evaluations as async tasks (don't await them)
         if (
             self._ai_config.judge_configuration
             and self._ai_config.judge_configuration.judges
             and len(self._ai_config.judge_configuration.judges) > 0
         ):
-            evaluations = await self._evaluate_with_judges(self._messages, response)
-            response.evaluations = evaluations
+            evaluation_tasks = self._start_judge_evaluations(self._messages, response)
+            response.evaluations = evaluation_tasks
 
         # Add the response message to conversation history
         self._messages.append(response.message)
         return response
 
-    async def _evaluate_with_judges(
+    def _start_judge_evaluations(
         self,
         messages: List[LDMessage],
         response: ChatResponse,
-    ) -> List[Optional[JudgeResponse]]:
+    ) -> List[asyncio.Task[Optional[JudgeResponse]]]:
         """
-        Evaluates the response with all configured judges.
+        Start judge evaluations as async tasks without awaiting them.
         
-        Returns a list of evaluation results.
+        Returns a list of async tasks that can be awaited later.
         
         :param messages: Array of messages representing the conversation history
         :param response: The AI response to be evaluated
-        :return: List of judge evaluation results (may contain None for failed evaluations)
+        :return: List of async tasks that will return judge evaluation results
         """
         if not self._ai_config.judge_configuration or not self._ai_config.judge_configuration.judges:
             return []
 
         judge_configs = self._ai_config.judge_configuration.judges
 
-        # Start all judge evaluations in parallel
+        # Start all judge evaluations as tasks
         async def evaluate_judge(judge_config):
             judge = self._judges.get(judge_config.key)
             if not judge:
@@ -116,16 +117,13 @@ async def evaluate_judge(judge_config):
 
             return eval_result
 
-        # Ensure all evaluations complete even if some fail
-        import asyncio
-        evaluation_promises = [evaluate_judge(judge_config) for judge_config in judge_configs]
-        results = await asyncio.gather(*evaluation_promises, return_exceptions=True)
-        
-        # Map exceptions to None
-        return [
-            None if isinstance(result, Exception) else result
-            for result in results
+        # Create tasks for each judge evaluation
+        tasks = [
+            asyncio.create_task(evaluate_judge(judge_config))
+            for judge_config in judge_configs
         ]
+        
+        return tasks
 
     def get_config(self) -> AICompletionConfig:
         """
diff --git a/ldai/providers/types.py b/ldai/providers/types.py
index 58ca3fc..45df755 100644
--- a/ldai/providers/types.py
+++ b/ldai/providers/types.py
@@ -15,6 +15,21 @@ class LDAIMetrics:
     success: bool
     usage: Optional[TokenUsage] = None
 
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Render the metrics as a dictionary object.
+        """
+        result: Dict[str, Any] = {
+            'success': self.success,
+        }
+        if self.usage is not None:
+            result['usage'] = {
+                'total': self.usage.total,
+                'input': self.usage.input,
+                'output': self.usage.output,
+            }
+        return result
+
 
 @dataclass
 class ChatResponse:
@@ -44,6 +59,15 @@ class EvalScore:
     score: float  # Score between 0.0 and 1.0
     reasoning: str  # Reasoning behind the provided score
 
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Render the evaluation score as a dictionary object.
+        """
+        return {
+            'score': self.score,
+            'reasoning': self.reasoning,
+        }
+
 
 @dataclass
 class JudgeResponse:
@@ -54,3 +78,15 @@ class JudgeResponse:
     success: bool  # Whether the evaluation completed successfully
     error: Optional[str] = None  # Error message if evaluation failed
 
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Render the judge response as a dictionary object.
+        """
+        result: Dict[str, Any] = {
+            'evals': {key: eval_score.to_dict() for key, eval_score in self.evals.items()},
+            'success': self.success,
+        }
+        if self.error is not None:
+            result['error'] = self.error
+        return result
+

From b33ef0e24449be3dbe274c4346bae84de711e36d Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Sun, 16 Nov 2025 23:43:32 +0100
Subject: [PATCH 14/37] move import to top of file

---
 ldai/providers/ai_provider.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/ldai/providers/ai_provider.py b/ldai/providers/ai_provider.py
index 5863a74..daf56c6 100644
--- a/ldai/providers/ai_provider.py
+++ b/ldai/providers/ai_provider.py
@@ -4,7 +4,7 @@
 from typing import Any, Dict, List, Optional, Union
 
 from ldai.models import AIConfigKind, LDMessage
-from ldai.providers.types import ChatResponse, StructuredResponse
+from ldai.providers.types import ChatResponse, LDAIMetrics, StructuredResponse
 
 
 class AIProvider(ABC):
@@ -42,9 +42,6 @@ async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
         if self.logger:
             self.logger.warn('invokeModel not implemented by this provider')
         
-        from ldai.models import LDMessage
-        from ldai.providers.types import LDAIMetrics
-        
         return ChatResponse(
             message=LDMessage(role='assistant', content=''),
             metrics=LDAIMetrics(success=False, usage=None),
@@ -71,8 +68,6 @@ async def invoke_structured_model(
         if self.logger:
             self.logger.warn('invokeStructuredModel not implemented by this provider')
         
-        from ldai.providers.types import LDAIMetrics
-        
         return StructuredResponse(
             data={},
             raw_response='',

From 87b4bd6ff0482a2304b804b00667f7df7e07dd7c Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Tue, 18 Nov 2025 16:00:52 +0100
Subject: [PATCH 15/37] Refactor chat module: move TrackedChat implementation
 to a new file and update imports for clarity

---
 ldai/chat/__init__.py     | 189 +------------------------------
 ldai/chat/tracked_chat.py | 186 ++++++++++++++++++++++++++++++
 ldai/judge/__init__.py    | 230 +-------------------------------------
 ldai/judge/ai_judge.py    | 230 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 422 insertions(+), 413 deletions(-)
 create mode 100644 ldai/chat/tracked_chat.py
 create mode 100644 ldai/judge/ai_judge.py

diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py
index 0785c16..ffef9ab 100644
--- a/ldai/chat/__init__.py
+++ b/ldai/chat/__init__.py
@@ -1,189 +1,6 @@
-"""TrackedChat implementation for managing AI chat conversations."""
+"""Chat module for LaunchDarkly AI SDK."""
 
-import asyncio
-from typing import Any, Dict, List, Optional
+from ldai.chat.tracked_chat import TrackedChat
 
-from ldai.models import AICompletionConfig, LDMessage
-from ldai.providers.ai_provider import AIProvider
-from ldai.providers.types import ChatResponse, JudgeResponse
-from ldai.judge import AIJudge
-from ldai.tracker import LDAIConfigTracker
-
-
-class TrackedChat:
-    """
-    Concrete implementation of TrackedChat that provides chat functionality
-    by delegating to an AIProvider implementation.
-    
-    This class handles conversation management and tracking, while delegating
-    the actual model invocation to the provider.
-    """
-
-    def __init__(
-        self,
-        ai_config: AICompletionConfig,
-        tracker: LDAIConfigTracker,
-        provider: AIProvider,
-        judges: Optional[Dict[str, AIJudge]] = None,
-        logger: Optional[Any] = None,
-    ):
-        """
-        Initialize the TrackedChat.
-        
-        :param ai_config: The completion AI configuration
-        :param tracker: The tracker for the completion configuration
-        :param provider: The AI provider to use for chat
-        :param judges: Optional dictionary of judge instances keyed by their configuration keys
-        :param logger: Optional logger for logging
-        """
-        self._ai_config = ai_config
-        self._tracker = tracker
-        self._provider = provider
-        self._judges = judges or {}
-        self._logger = logger
-        self._messages: List[LDMessage] = []
-
-    async def invoke(self, prompt: str) -> ChatResponse:
-        """
-        Invoke the chat model with a prompt string.
-        
-        This method handles conversation management and tracking, delegating to the provider's invoke_model method.
-        
-        :param prompt: The user prompt to send to the chat model
-        :return: ChatResponse containing the model's response and metrics
-        """
-        # Convert prompt string to LDMessage with role 'user' and add to conversation history
-        user_message: LDMessage = LDMessage(role='user', content=prompt)
-        self._messages.append(user_message)
-
-        # Prepend config messages to conversation history for model invocation
-        config_messages = self._ai_config.messages or []
-        all_messages = config_messages + self._messages
-
-        # Delegate to provider-specific implementation with tracking
-        response = await self._tracker.track_metrics_of(
-            lambda result: result.metrics,
-            lambda: self._provider.invoke_model(all_messages),
-        )
-
-        # Start judge evaluations as async tasks (don't await them)
-        if (
-            self._ai_config.judge_configuration
-            and self._ai_config.judge_configuration.judges
-            and len(self._ai_config.judge_configuration.judges) > 0
-        ):
-            evaluation_tasks = self._start_judge_evaluations(self._messages, response)
-            response.evaluations = evaluation_tasks
-
-        # Add the response message to conversation history
-        self._messages.append(response.message)
-        return response
-
-    def _start_judge_evaluations(
-        self,
-        messages: List[LDMessage],
-        response: ChatResponse,
-    ) -> List[asyncio.Task[Optional[JudgeResponse]]]:
-        """
-        Start judge evaluations as async tasks without awaiting them.
-        
-        Returns a list of async tasks that can be awaited later.
-        
-        :param messages: Array of messages representing the conversation history
-        :param response: The AI response to be evaluated
-        :return: List of async tasks that will return judge evaluation results
-        """
-        if not self._ai_config.judge_configuration or not self._ai_config.judge_configuration.judges:
-            return []
-
-        judge_configs = self._ai_config.judge_configuration.judges
-
-        # Start all judge evaluations as tasks
-        async def evaluate_judge(judge_config):
-            judge = self._judges.get(judge_config.key)
-            if not judge:
-                if self._logger:
-                    self._logger.warn(
-                        f"Judge configuration is not enabled: {judge_config.key}",
-                    )
-                return None
-
-            eval_result = await judge.evaluate_messages(
-                messages, response, judge_config.sampling_rate
-            )
-
-            if eval_result and eval_result.success:
-                self._tracker.track_eval_scores(eval_result.evals)
-
-            return eval_result
-
-        # Create tasks for each judge evaluation
-        tasks = [
-            asyncio.create_task(evaluate_judge(judge_config))
-            for judge_config in judge_configs
-        ]
-        
-        return tasks
-
-    def get_config(self) -> AICompletionConfig:
-        """
-        Get the underlying AI configuration used to initialize this TrackedChat.
-        
-        :return: The AI completion configuration
-        """
-        return self._ai_config
-
-    def get_tracker(self) -> LDAIConfigTracker:
-        """
-        Get the underlying AI configuration tracker used to initialize this TrackedChat.
-        
-        :return: The tracker instance
-        """
-        return self._tracker
-
-    def get_provider(self) -> AIProvider:
-        """
-        Get the underlying AI provider instance.
-        
-        This provides direct access to the provider for advanced use cases.
-        
-        :return: The AI provider instance
-        """
-        return self._provider
-
-    def get_judges(self) -> Dict[str, AIJudge]:
-        """
-        Get the judges associated with this TrackedChat.
-        
-        Returns a dictionary of judge instances keyed by their configuration keys.
-        
-        :return: Dictionary of judge instances
-        """
-        return self._judges
-
-    def append_messages(self, messages: List[LDMessage]) -> None:
-        """
-        Append messages to the conversation history.
-        
-        Adds messages to the conversation history without invoking the model,
-        which is useful for managing multi-turn conversations or injecting context.
-        
-        :param messages: Array of messages to append to the conversation history
-        """
-        self._messages.extend(messages)
-
-    def get_messages(self, include_config_messages: bool = False) -> List[LDMessage]:
-        """
-        Get all messages in the conversation history.
-        
-        :param include_config_messages: Whether to include the config messages from the AIConfig.
-                                       Defaults to False.
-        :return: Array of messages. When include_config_messages is True, returns both config
-                messages and conversation history with config messages prepended. When False,
-                returns only the conversation history messages.
-        """
-        if include_config_messages:
-            config_messages = self._ai_config.messages or []
-            return config_messages + self._messages
-        return list(self._messages)
+__all__ = ['TrackedChat']
 
diff --git a/ldai/chat/tracked_chat.py b/ldai/chat/tracked_chat.py
new file mode 100644
index 0000000..037fd9a
--- /dev/null
+++ b/ldai/chat/tracked_chat.py
@@ -0,0 +1,186 @@
+"""TrackedChat implementation for managing AI chat conversations."""
+
+import asyncio
+from typing import Any, Dict, List, Optional
+
+from ldai.models import AICompletionConfig, LDMessage
+from ldai.providers.ai_provider import AIProvider
+from ldai.providers.types import ChatResponse, JudgeResponse
+from ldai.judge import AIJudge
+from ldai.tracker import LDAIConfigTracker
+
+
+class TrackedChat:
+    """
+    Concrete implementation of TrackedChat that provides chat functionality
+    by delegating to an AIProvider implementation.
+    
+    This class handles conversation management and tracking, while delegating
+    the actual model invocation to the provider.
+    """
+
+    def __init__(
+        self,
+        ai_config: AICompletionConfig,
+        tracker: LDAIConfigTracker,
+        provider: AIProvider,
+        judges: Optional[Dict[str, AIJudge]] = None,
+        logger: Optional[Any] = None,
+    ):
+        """
+        Initialize the TrackedChat.
+        
+        :param ai_config: The completion AI configuration
+        :param tracker: The tracker for the completion configuration
+        :param provider: The AI provider to use for chat
+        :param judges: Optional dictionary of judge instances keyed by their configuration keys
+        :param logger: Optional logger for logging
+        """
+        self._ai_config = ai_config
+        self._tracker = tracker
+        self._provider = provider
+        self._judges = judges or {}
+        self._logger = logger
+        self._messages: List[LDMessage] = []
+
+    async def invoke(self, prompt: str) -> ChatResponse:
+        """
+        Invoke the chat model with a prompt string.
+        
+        This method handles conversation management and tracking, delegating to the provider's invoke_model method.
+        
+        :param prompt: The user prompt to send to the chat model
+        :return: ChatResponse containing the model's response and metrics
+        """
+        # Convert prompt string to LDMessage with role 'user' and add to conversation history
+        user_message: LDMessage = LDMessage(role='user', content=prompt)
+        self._messages.append(user_message)
+
+        # Prepend config messages to conversation history for model invocation
+        config_messages = self._ai_config.messages or []
+        all_messages = config_messages + self._messages
+
+        # Delegate to provider-specific implementation with tracking
+        response = await self._tracker.track_metrics_of(
+            lambda result: result.metrics,
+            lambda: self._provider.invoke_model(all_messages),
+        )
+
+        # Start judge evaluations as async tasks (don't await them)
+        judge_config = self._ai_config.judge_configuration
+        if judge_config and judge_config.judges and len(judge_config.judges) > 0:
+            evaluation_tasks = self._start_judge_evaluations(self._messages, response)
+            response.evaluations = evaluation_tasks
+
+        # Add the response message to conversation history
+        self._messages.append(response.message)
+        return response
+
+    def _start_judge_evaluations(
+        self,
+        messages: List[LDMessage],
+        response: ChatResponse,
+    ) -> List[asyncio.Task[Optional[JudgeResponse]]]:
+        """
+        Start judge evaluations as async tasks without awaiting them.
+        
+        Returns a list of async tasks that can be awaited later.
+        
+        :param messages: Array of messages representing the conversation history
+        :param response: The AI response to be evaluated
+        :return: List of async tasks that will return judge evaluation results
+        """
+        if not self._ai_config.judge_configuration or not self._ai_config.judge_configuration.judges:
+            return []
+
+        judge_configs = self._ai_config.judge_configuration.judges
+
+        # Start all judge evaluations as tasks
+        async def evaluate_judge(judge_config):
+            judge = self._judges.get(judge_config.key)
+            if not judge:
+                if self._logger:
+                    self._logger.warn(
+                        f"Judge configuration is not enabled: {judge_config.key}",
+                    )
+                return None
+
+            eval_result = await judge.evaluate_messages(
+                messages, response, judge_config.sampling_rate
+            )
+
+            if eval_result and eval_result.success:
+                self._tracker.track_eval_scores(eval_result.evals)
+
+            return eval_result
+
+        # Create tasks for each judge evaluation
+        tasks = [
+            asyncio.create_task(evaluate_judge(judge_config))
+            for judge_config in judge_configs
+        ]
+        
+        return tasks
+
+    def get_config(self) -> AICompletionConfig:
+        """
+        Get the underlying AI configuration used to initialize this TrackedChat.
+        
+        :return: The AI completion configuration
+        """
+        return self._ai_config
+
+    def get_tracker(self) -> LDAIConfigTracker:
+        """
+        Get the underlying AI configuration tracker used to initialize this TrackedChat.
+        
+        :return: The tracker instance
+        """
+        return self._tracker
+
+    def get_provider(self) -> AIProvider:
+        """
+        Get the underlying AI provider instance.
+        
+        This provides direct access to the provider for advanced use cases.
+        
+        :return: The AI provider instance
+        """
+        return self._provider
+
+    def get_judges(self) -> Dict[str, AIJudge]:
+        """
+        Get the judges associated with this TrackedChat.
+        
+        Returns a dictionary of judge instances keyed by their configuration keys.
+        
+        :return: Dictionary of judge instances
+        """
+        return self._judges
+
+    def append_messages(self, messages: List[LDMessage]) -> None:
+        """
+        Append messages to the conversation history.
+        
+        Adds messages to the conversation history without invoking the model,
+        which is useful for managing multi-turn conversations or injecting context.
+        
+        :param messages: Array of messages to append to the conversation history
+        """
+        self._messages.extend(messages)
+
+    def get_messages(self, include_config_messages: bool = False) -> List[LDMessage]:
+        """
+        Get all messages in the conversation history.
+        
+        :param include_config_messages: Whether to include the config messages from the AIConfig.
+                                       Defaults to False.
+        :return: Array of messages. When include_config_messages is True, returns both config
+                messages and conversation history with config messages prepended. When False,
+                returns only the conversation history messages.
+        """
+        if include_config_messages:
+            config_messages = self._ai_config.messages or []
+            return config_messages + self._messages
+        return list(self._messages)
+
diff --git a/ldai/judge/__init__.py b/ldai/judge/__init__.py
index 323cd19..4ab4df4 100644
--- a/ldai/judge/__init__.py
+++ b/ldai/judge/__init__.py
@@ -1,231 +1,7 @@
-"""Judge implementation for AI evaluation."""
+"""Judge module for LaunchDarkly AI SDK."""
 
-import random
-from typing import Any, Dict, Optional
+from ldai.judge.ai_judge import AIJudge
 
-import chevron
-
-from ldai.models import AIJudgeConfig, LDMessage
-from ldai.providers.ai_provider import AIProvider
-from ldai.providers.types import ChatResponse, EvalScore, JudgeResponse, StructuredResponse
-from ldai.tracker import LDAIConfigTracker
-from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder
-
-
-class AIJudge:
-    """
-    Judge implementation that handles evaluation functionality and conversation management.
-    
-    According to the AIEval spec, judges are AI Configs with mode: "judge" that evaluate
-    other AI Configs using structured output.
-    """
-
-    def __init__(
-        self,
-        ai_config: AIJudgeConfig,
-        ai_config_tracker: LDAIConfigTracker,
-        ai_provider: AIProvider,
-        logger: Optional[Any] = None,
-    ):
-        """
-        Initialize the Judge.
-        
-        :param ai_config: The judge AI configuration
-        :param ai_config_tracker: The tracker for the judge configuration
-        :param ai_provider: The AI provider to use for evaluation
-        :param logger: Optional logger for logging
-        """
-        self._ai_config = ai_config
-        self._ai_config_tracker = ai_config_tracker
-        self._ai_provider = ai_provider
-        self._logger = logger
-        self._evaluation_response_structure = EvaluationSchemaBuilder.build(
-            ai_config.evaluation_metric_keys
-        )
-
-    async def evaluate(
-        self,
-        input_text: str,
-        output_text: str,
-        sampling_rate: float = 1.0,
-    ) -> Optional[JudgeResponse]:
-        """
-        Evaluates an AI response using the judge's configuration.
-        
-        :param input_text: The input prompt or question that was provided to the AI
-        :param output_text: The AI-generated response to be evaluated
-        :param sampling_rate: Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1)
-        :return: Evaluation results or None if not sampled
-        """
-        try:
-            if not self._ai_config.evaluation_metric_keys or len(self._ai_config.evaluation_metric_keys) == 0:
-                if self._logger:
-                    self._logger.warn(
-                        'Judge configuration is missing required evaluationMetricKeys'
-                    )
-                return None
-
-            if not self._ai_config.messages:
-                if self._logger:
-                    self._logger.warn('Judge configuration must include messages')
-                return None
-
-            if random.random() > sampling_rate:
-                if self._logger:
-                    self._logger.debug(f'Judge evaluation skipped due to sampling rate: {sampling_rate}')
-                return None
-
-            messages = self._construct_evaluation_messages(input_text, output_text)
-
-            # Track metrics of the structured model invocation
-            response = await self._ai_config_tracker.track_metrics_of(
-                lambda result: result.metrics,
-                lambda: self._ai_provider.invoke_structured_model(messages, self._evaluation_response_structure)
-            )
-
-            success = response.metrics.success
-
-            evals = self._parse_evaluation_response(response.data)
-
-            if len(evals) != len(self._ai_config.evaluation_metric_keys):
-                if self._logger:
-                    self._logger.warn('Judge evaluation did not return all evaluations')
-                success = False
-
-            return JudgeResponse(
-                evals=evals,
-                success=success,
-            )
-        except Exception as error:
-            if self._logger:
-                self._logger.error(f'Judge evaluation failed: {error}')
-            return JudgeResponse(
-                evals={},
-                success=False,
-                error=str(error) if isinstance(error, Exception) else 'Unknown error',
-            )
-
-    async def evaluate_messages(
-        self,
-        messages: list[LDMessage],
-        response: ChatResponse,
-        sampling_ratio: float = 1.0,
-    ) -> Optional[JudgeResponse]:
-        """
-        Evaluates an AI response from chat messages and response.
-        
-        :param messages: Array of messages representing the conversation history
-        :param response: The AI response to be evaluated
-        :param sampling_ratio: Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1)
-        :return: Evaluation results or None if not sampled
-        """
-        input_text = '\r\n'.join([msg.content for msg in messages]) if messages else ''
-        output_text = response.message.content
-
-        return await self.evaluate(input_text, output_text, sampling_ratio)
-
-    def get_ai_config(self) -> AIJudgeConfig:
-        """
-        Returns the AI Config used by this judge.
-        
-        :return: The judge AI configuration
-        """
-        return self._ai_config
-
-    def get_tracker(self) -> LDAIConfigTracker:
-        """
-        Returns the tracker associated with this judge.
-        
-        :return: The tracker for the judge configuration
-        """
-        return self._ai_config_tracker
-
-    def get_provider(self) -> AIProvider:
-        """
-        Returns the AI provider used by this judge.
-        
-        :return: The AI provider
-        """
-        return self._ai_provider
-
-    def _construct_evaluation_messages(self, input_text: str, output_text: str) -> list[LDMessage]:
-        """
-        Constructs evaluation messages by combining judge's config messages with input/output.
-        
-        :param input_text: The input text
-        :param output_text: The output text to evaluate
-        :return: List of messages for evaluation
-        """
-        if not self._ai_config.messages:
-            return []
-
-        messages: list[LDMessage] = []
-        for msg in self._ai_config.messages:
-            # Interpolate message content with reserved variables
-            content = self._interpolate_message(msg.content, {
-                'message_history': input_text,
-                'response_to_evaluate': output_text,
-            })
-            messages.append(LDMessage(role=msg.role, content=content))
-
-        return messages
-
-    def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str:
-        """
-        Interpolates message content with variables using Mustache templating.
-        
-        :param content: The message content template
-        :param variables: Variables to interpolate
-        :return: Interpolated message content
-        """
-        # Use chevron (Mustache) for templating, with no escaping
-        return chevron.render(content, variables)
-
-    def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScore]:
-        """
-        Parses the structured evaluation response from the AI provider.
-        
-        :param data: The structured response data
-        :return: Dictionary of evaluation scores keyed by metric key
-        """
-        results: Dict[str, EvalScore] = {}
-
-        if not data.get('evaluations') or not isinstance(data['evaluations'], dict):
-            if self._logger:
-                self._logger.warn('Invalid response: missing or invalid evaluations object')
-            return results
-
-        evaluations = data['evaluations']
-
-        for metric_key in self._ai_config.evaluation_metric_keys:
-            evaluation = evaluations.get(metric_key)
-
-            if not evaluation or not isinstance(evaluation, dict):
-                if self._logger:
-                    self._logger.warn(f'Missing evaluation for metric key: {metric_key}')
-                continue
-
-            score = evaluation.get('score')
-            reasoning = evaluation.get('reasoning')
-
-            if not isinstance(score, (int, float)) or score < 0 or score > 1:
-                if self._logger:
-                    self._logger.warn(
-                        f'Invalid score evaluated for {metric_key}: {score}. '
-                        'Score must be a number between 0 and 1 inclusive'
-                    )
-                continue
-
-            if not isinstance(reasoning, str):
-                if self._logger:
-                    self._logger.warn(
-                        f'Invalid reasoning evaluated for {metric_key}: {reasoning}. '
-                        'Reasoning must be a string'
-                    )
-                continue
-
-            results[metric_key] = EvalScore(score=float(score), reasoning=reasoning)
-
-        return results
+__all__ = ['AIJudge']
 
 
diff --git a/ldai/judge/ai_judge.py b/ldai/judge/ai_judge.py
new file mode 100644
index 0000000..20efbf8
--- /dev/null
+++ b/ldai/judge/ai_judge.py
@@ -0,0 +1,230 @@
+"""Judge implementation for AI evaluation."""
+
+import random
+from typing import Any, Dict, Optional
+
+import chevron
+
+from ldai.models import AIJudgeConfig, LDMessage
+from ldai.providers.ai_provider import AIProvider
+from ldai.providers.types import ChatResponse, EvalScore, JudgeResponse, StructuredResponse
+from ldai.tracker import LDAIConfigTracker
+from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder
+
+
+class AIJudge:
+    """
+    Judge implementation that handles evaluation functionality and conversation management.
+    
+    According to the AIEval spec, judges are AI Configs with mode: "judge" that evaluate
+    other AI Configs using structured output.
+    """
+
+    def __init__(
+        self,
+        ai_config: AIJudgeConfig,
+        ai_config_tracker: LDAIConfigTracker,
+        ai_provider: AIProvider,
+        logger: Optional[Any] = None,
+    ):
+        """
+        Initialize the Judge.
+        
+        :param ai_config: The judge AI configuration
+        :param ai_config_tracker: The tracker for the judge configuration
+        :param ai_provider: The AI provider to use for evaluation
+        :param logger: Optional logger for logging
+        """
+        self._ai_config = ai_config
+        self._ai_config_tracker = ai_config_tracker
+        self._ai_provider = ai_provider
+        self._logger = logger
+        self._evaluation_response_structure = EvaluationSchemaBuilder.build(
+            ai_config.evaluation_metric_keys
+        )
+
+    async def evaluate(
+        self,
+        input_text: str,
+        output_text: str,
+        sampling_rate: float = 1.0,
+    ) -> Optional[JudgeResponse]:
+        """
+        Evaluates an AI response using the judge's configuration.
+        
+        :param input_text: The input prompt or question that was provided to the AI
+        :param output_text: The AI-generated response to be evaluated
+        :param sampling_rate: Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1)
+        :return: Evaluation results or None if not sampled
+        """
+        try:
+            if not self._ai_config.evaluation_metric_keys or len(self._ai_config.evaluation_metric_keys) == 0:
+                if self._logger:
+                    self._logger.warn(
+                        'Judge configuration is missing required evaluationMetricKeys'
+                    )
+                return None
+
+            if not self._ai_config.messages:
+                if self._logger:
+                    self._logger.warn('Judge configuration must include messages')
+                return None
+
+            if random.random() > sampling_rate:
+                if self._logger:
+                    self._logger.debug(f'Judge evaluation skipped due to sampling rate: {sampling_rate}')
+                return None
+
+            messages = self._construct_evaluation_messages(input_text, output_text)
+
+            # Track metrics of the structured model invocation
+            response = await self._ai_config_tracker.track_metrics_of(
+                lambda result: result.metrics,
+                lambda: self._ai_provider.invoke_structured_model(messages, self._evaluation_response_structure)
+            )
+
+            success = response.metrics.success
+
+            evals = self._parse_evaluation_response(response.data)
+
+            if len(evals) != len(self._ai_config.evaluation_metric_keys):
+                if self._logger:
+                    self._logger.warn('Judge evaluation did not return all evaluations')
+                success = False
+
+            return JudgeResponse(
+                evals=evals,
+                success=success,
+            )
+        except Exception as error:
+            if self._logger:
+                self._logger.error(f'Judge evaluation failed: {error}')
+            return JudgeResponse(
+                evals={},
+                success=False,
+                error=str(error) if isinstance(error, Exception) else 'Unknown error',
+            )
+
+    async def evaluate_messages(
+        self,
+        messages: list[LDMessage],
+        response: ChatResponse,
+        sampling_ratio: float = 1.0,
+    ) -> Optional[JudgeResponse]:
+        """
+        Evaluates an AI response from chat messages and response.
+        
+        :param messages: Array of messages representing the conversation history
+        :param response: The AI response to be evaluated
+        :param sampling_ratio: Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1)
+        :return: Evaluation results or None if not sampled
+        """
+        input_text = '\r\n'.join([msg.content for msg in messages]) if messages else ''
+        output_text = response.message.content
+
+        return await self.evaluate(input_text, output_text, sampling_ratio)
+
+    def get_ai_config(self) -> AIJudgeConfig:
+        """
+        Returns the AI Config used by this judge.
+        
+        :return: The judge AI configuration
+        """
+        return self._ai_config
+
+    def get_tracker(self) -> LDAIConfigTracker:
+        """
+        Returns the tracker associated with this judge.
+        
+        :return: The tracker for the judge configuration
+        """
+        return self._ai_config_tracker
+
+    def get_provider(self) -> AIProvider:
+        """
+        Returns the AI provider used by this judge.
+        
+        :return: The AI provider
+        """
+        return self._ai_provider
+
+    def _construct_evaluation_messages(self, input_text: str, output_text: str) -> list[LDMessage]:
+        """
+        Constructs evaluation messages by combining judge's config messages with input/output.
+        
+        :param input_text: The input text
+        :param output_text: The output text to evaluate
+        :return: List of messages for evaluation
+        """
+        if not self._ai_config.messages:
+            return []
+
+        messages: list[LDMessage] = []
+        for msg in self._ai_config.messages:
+            # Interpolate message content with reserved variables
+            content = self._interpolate_message(msg.content, {
+                'message_history': input_text,
+                'response_to_evaluate': output_text,
+            })
+            messages.append(LDMessage(role=msg.role, content=content))
+
+        return messages
+
+    def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str:
+        """
+        Interpolates message content with variables using Mustache templating.
+        
+        :param content: The message content template
+        :param variables: Variables to interpolate
+        :return: Interpolated message content
+        """
+        # Use chevron (Mustache) for templating, with no escaping
+        return chevron.render(content, variables)
+
+    def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScore]:
+        """
+        Parses the structured evaluation response from the AI provider.
+        
+        :param data: The structured response data
+        :return: Dictionary of evaluation scores keyed by metric key
+        """
+        results: Dict[str, EvalScore] = {}
+
+        if not data.get('evaluations') or not isinstance(data['evaluations'], dict):
+            if self._logger:
+                self._logger.warn('Invalid response: missing or invalid evaluations object')
+            return results
+
+        evaluations = data['evaluations']
+
+        for metric_key in self._ai_config.evaluation_metric_keys:
+            evaluation = evaluations.get(metric_key)
+
+            if not evaluation or not isinstance(evaluation, dict):
+                if self._logger:
+                    self._logger.warn(f'Missing evaluation for metric key: {metric_key}')
+                continue
+
+            score = evaluation.get('score')
+            reasoning = evaluation.get('reasoning')
+
+            if not isinstance(score, (int, float)) or score < 0 or score > 1:
+                if self._logger:
+                    self._logger.warn(
+                        f'Invalid score evaluated for {metric_key}: {score}. '
+                        'Score must be a number between 0 and 1 inclusive'
+                    )
+                continue
+
+            if not isinstance(reasoning, str):
+                if self._logger:
+                    self._logger.warn(
+                        f'Invalid reasoning evaluated for {metric_key}: {reasoning}. '
+                        'Reasoning must be a string'
+                    )
+                continue
+
+            results[metric_key] = EvalScore(score=float(score), reasoning=reasoning)
+
+        return results
+

From f3c792dbd75d3a84eb74165e4489b8138dbfb95d Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Tue, 18 Nov 2025 16:15:39 +0100
Subject: [PATCH 16/37] add  tests for LangChainProvider

---
 ldai/testing/test_langchain_provider.py | 237 ++++++++++++++++++++++++
 pyproject.toml                          |   1 +
 2 files changed, 238 insertions(+)
 create mode 100644 ldai/testing/test_langchain_provider.py

diff --git a/ldai/testing/test_langchain_provider.py b/ldai/testing/test_langchain_provider.py
new file mode 100644
index 0000000..3bb83a1
--- /dev/null
+++ b/ldai/testing/test_langchain_provider.py
@@ -0,0 +1,237 @@
+"""Tests for LangChain provider implementation."""
+
+import pytest
+from unittest.mock import AsyncMock, Mock
+
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
+
+from ldai.models import LDMessage
+from ldai.providers.langchain import LangChainProvider
+from ldai.tracker import TokenUsage
+
+
+class TestMessageConversion:
+    """Test conversion between LD messages and LangChain messages."""
+
+    def test_convert_multiple_messages(self):
+        """Test converting a conversation with all message types."""
+        ld_messages = [
+            LDMessage(role='system', content='You are helpful'),
+            LDMessage(role='user', content='Hello'),
+            LDMessage(role='assistant', content='Hi there!'),
+        ]
+        lc_messages = LangChainProvider.convert_messages_to_langchain(ld_messages)
+        
+        assert len(lc_messages) == 3
+        assert isinstance(lc_messages[0], SystemMessage)
+        assert isinstance(lc_messages[1], HumanMessage)
+        assert isinstance(lc_messages[2], AIMessage)
+        assert lc_messages[0].content == 'You are helpful'
+        assert lc_messages[1].content == 'Hello'
+        assert lc_messages[2].content == 'Hi there!'
+
+    def test_convert_unsupported_role_raises_error(self):
+        """Test that unsupported message roles raise ValueError."""
+        ld_messages = [LDMessage(role='function', content='Function result')]
+        
+        with pytest.raises(ValueError, match='Unsupported message role: function'):
+            LangChainProvider.convert_messages_to_langchain(ld_messages)
+
+
+class TestMetricsExtraction:
+    """Test metrics extraction from LangChain response metadata."""
+
+    def test_extract_metrics_with_token_usage(self):
+        """Test extracting token usage from response metadata."""
+        response = AIMessage(
+            content='Hello, world!',
+            response_metadata={
+                'token_usage': {
+                    'total_tokens': 100,
+                    'prompt_tokens': 60,
+                    'completion_tokens': 40,
+                }
+            }
+        )
+        
+        metrics = LangChainProvider.get_ai_metrics_from_response(response)
+        
+        assert metrics.success is True
+        assert metrics.usage is not None
+        assert metrics.usage.total == 100
+        assert metrics.usage.input == 60
+        assert metrics.usage.output == 40
+
+    def test_extract_metrics_with_camel_case_token_usage(self):
+        """Test extracting token usage with camelCase keys (some providers use this)."""
+        response = AIMessage(
+            content='Hello, world!',
+            response_metadata={
+                'token_usage': {
+                    'totalTokens': 150,
+                    'promptTokens': 90,
+                    'completionTokens': 60,
+                }
+            }
+        )
+        
+        metrics = LangChainProvider.get_ai_metrics_from_response(response)
+        
+        assert metrics.success is True
+        assert metrics.usage is not None
+        assert metrics.usage.total == 150
+        assert metrics.usage.input == 90
+        assert metrics.usage.output == 60
+
+    def test_extract_metrics_without_token_usage(self):
+        """Test metrics extraction when no token usage is available."""
+        response = AIMessage(content='Hello, world!')
+        
+        metrics = LangChainProvider.get_ai_metrics_from_response(response)
+        
+        assert metrics.success is True
+        assert metrics.usage is None
+
+
+class TestInvokeModel:
+    """Test model invocation with LangChain provider."""
+
+    @pytest.mark.asyncio
+    async def test_invoke_model_success(self):
+        """Test successful model invocation."""
+        mock_llm = AsyncMock()
+        mock_response = AIMessage(
+            content='Hello, user!',
+            response_metadata={
+                'token_usage': {
+                    'total_tokens': 20,
+                    'prompt_tokens': 10,
+                    'completion_tokens': 10,
+                }
+            }
+        )
+        mock_llm.ainvoke.return_value = mock_response
+        
+        provider = LangChainProvider(mock_llm)
+        messages = [LDMessage(role='user', content='Hello')]
+        
+        response = await provider.invoke_model(messages)
+        
+        assert response.message.role == 'assistant'
+        assert response.message.content == 'Hello, user!'
+        assert response.metrics.success is True
+        assert response.metrics.usage is not None
+        assert response.metrics.usage.total == 20
+
+    @pytest.mark.asyncio
+    async def test_invoke_model_with_multimodal_content_warning(self):
+        """Test that non-string content triggers warning and marks as failure."""
+        mock_llm = AsyncMock()
+        mock_response = AIMessage(
+            content=['text', {'type': 'image'}],  # Non-string content
+            response_metadata={'token_usage': {'total_tokens': 20}}
+        )
+        mock_llm.ainvoke.return_value = mock_response
+        
+        mock_logger = Mock()
+        provider = LangChainProvider(mock_llm, logger=mock_logger)
+        messages = [LDMessage(role='user', content='Describe this image')]
+        
+        response = await provider.invoke_model(messages)
+        
+        # Should warn about multimodal content
+        mock_logger.warn.assert_called_once()
+        assert 'Multimodal response not supported' in str(mock_logger.warn.call_args)
+        
+        # Should mark as failure
+        assert response.metrics.success is False
+        assert response.message.content == ''
+
+    @pytest.mark.asyncio
+    async def test_invoke_model_with_exception(self):
+        """Test model invocation handles exceptions gracefully."""
+        mock_llm = AsyncMock()
+        mock_llm.ainvoke.side_effect = Exception('Model API error')
+        
+        mock_logger = Mock()
+        provider = LangChainProvider(mock_llm, logger=mock_logger)
+        messages = [LDMessage(role='user', content='Hello')]
+        
+        response = await provider.invoke_model(messages)
+        
+        # Should log the error
+        mock_logger.warn.assert_called_once()
+        assert 'LangChain model invocation failed' in str(mock_logger.warn.call_args)
+        
+        # Should return failure response
+        assert response.message.role == 'assistant'
+        assert response.message.content == ''
+        assert response.metrics.success is False
+        assert response.metrics.usage is None
+
+
+class TestInvokeStructuredModel:
+    """Test structured output invocation."""
+
+    @pytest.mark.asyncio
+    async def test_invoke_structured_model_with_support(self):
+        """Test structured output when model supports with_structured_output."""
+        mock_llm = Mock()
+        mock_structured_llm = AsyncMock()
+        mock_structured_llm.ainvoke.return_value = {
+            'answer': 'Paris',
+            'confidence': 0.95
+        }
+        mock_llm.with_structured_output.return_value = mock_structured_llm
+        
+        provider = LangChainProvider(mock_llm)
+        messages = [LDMessage(role='user', content='What is the capital of France?')]
+        schema = {'answer': 'string', 'confidence': 'number'}
+        
+        response = await provider.invoke_structured_model(messages, schema)
+        
+        assert response.data == {'answer': 'Paris', 'confidence': 0.95}
+        assert response.metrics.success is True
+        mock_llm.with_structured_output.assert_called_once_with(schema)
+
+    @pytest.mark.asyncio
+    async def test_invoke_structured_model_without_support_json_fallback(self):
+        """Test structured output fallback to JSON parsing when not supported."""
+        mock_llm = AsyncMock()
+        # Model doesn't have with_structured_output
+        delattr(mock_llm, 'with_structured_output') if hasattr(mock_llm, 'with_structured_output') else None
+        
+        mock_response = AIMessage(content='{"answer": "Berlin", "confidence": 0.9}')
+        mock_llm.ainvoke.return_value = mock_response
+        
+        provider = LangChainProvider(mock_llm)
+        messages = [LDMessage(role='user', content='What is the capital of Germany?')]
+        schema = {'answer': 'string', 'confidence': 'number'}
+        
+        response = await provider.invoke_structured_model(messages, schema)
+        
+        assert response.data == {'answer': 'Berlin', 'confidence': 0.9}
+        assert response.metrics.success is True
+
+    @pytest.mark.asyncio
+    async def test_invoke_structured_model_with_exception(self):
+        """Test structured output handles exceptions gracefully."""
+        mock_llm = Mock()
+        mock_llm.with_structured_output.side_effect = Exception('Structured output error')
+        
+        mock_logger = Mock()
+        provider = LangChainProvider(mock_llm, logger=mock_logger)
+        messages = [LDMessage(role='user', content='Question')]
+        schema = {'answer': 'string'}
+        
+        response = await provider.invoke_structured_model(messages, schema)
+        
+        # Should log the error
+        mock_logger.warn.assert_called_once()
+        assert 'LangChain structured model invocation failed' in str(mock_logger.warn.call_args)
+        
+        # Should return failure response
+        assert response.data == {}
+        assert response.raw_response == ''
+        assert response.metrics.success is False
+
diff --git a/pyproject.toml b/pyproject.toml
index 200215c..9c1f44a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,6 +36,7 @@ chevron = "=0.14.0"
 pytest = ">=2.8"
 pytest-cov = ">=2.4.0"
 pytest-mypy = "==1.0.1"
+pytest-asyncio = ">=0.21.0"
 mypy = "==1.18.2"
 pycodestyle = "^2.12.1"
 isort = ">=5.13.2,<7.0.0"

From 43d2b5421729682811bc305ef0e417a90b9c6465 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Tue, 18 Nov 2025 16:16:48 +0100
Subject: [PATCH 17/37] Refactor AI configuration tests to use
 AICompletionConfigDefault and update async tracking for OpenAI metrics

---
 ldai/providers/ai_provider_factory.py |  2 ++
 ldai/testing/test_model_config.py     | 46 +++++++++++++--------------
 ldai/testing/test_tracker.py          | 15 ++++++---
 3 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/ldai/providers/ai_provider_factory.py b/ldai/providers/ai_provider_factory.py
index 41cc1c2..bd17485 100644
--- a/ldai/providers/ai_provider_factory.py
+++ b/ldai/providers/ai_provider_factory.py
@@ -113,6 +113,8 @@ async def _try_create_provider(
                     )
                 return None
 
+        # TODO: REL-10773 OpenAI provider
+        # TODO: REL-10776 Vercel provider
         # For future external providers, use dynamic import
         provider_mappings = {
             # 'openai': ('launchdarkly_server_sdk_ai_openai', 'OpenAIProvider'),
diff --git a/ldai/testing/test_model_config.py b/ldai/testing/test_model_config.py
index b35389d..d556c10 100644
--- a/ldai/testing/test_model_config.py
+++ b/ldai/testing/test_model_config.py
@@ -2,7 +2,7 @@
 from ldclient import Config, Context, LDClient
 from ldclient.integrations.test_data import TestData
 
-from ldai import AIConfig, LDAIClient, LDMessage, ModelConfig
+from ldai import AICompletionConfigDefault, LDAIClient, LDMessage, ModelConfig
 
 
 @pytest.fixture
@@ -133,14 +133,14 @@ def test_model_config_handles_custom():
 
 def test_uses_default_on_invalid_flag(ldai_client: LDAIClient):
     context = Context.create('user-key')
-    default_value = AIConfig(
+    default_value = AICompletionConfigDefault(
         enabled=True,
         model=ModelConfig('fakeModel', parameters={'temperature': 0.5, 'maxTokens': 4096}),
         messages=[LDMessage(role='system', content='Hello, {{name}}!')],
     )
     variables = {'name': 'World'}
 
-    config, _ = ldai_client.config('missing-flag', context, default_value, variables)
+    config = ldai_client.config('missing-flag', context, default_value, variables)
 
     assert config.messages is not None
     assert len(config.messages) > 0
@@ -155,14 +155,14 @@ def test_uses_default_on_invalid_flag(ldai_client: LDAIClient):
 
 def test_model_config_interpolation(ldai_client: LDAIClient):
     context = Context.create('user-key')
-    default_value = AIConfig(
+    default_value = AICompletionConfigDefault(
         enabled=True,
         model=ModelConfig('fakeModel'),
         messages=[LDMessage(role='system', content='Hello, {{name}}!')],
     )
     variables = {'name': 'World'}
 
-    config, _ = ldai_client.config('model-config', context, default_value, variables)
+    config = ldai_client.config('model-config', context, default_value, variables)
 
     assert config.messages is not None
     assert len(config.messages) > 0
@@ -177,9 +177,9 @@ def test_model_config_interpolation(ldai_client: LDAIClient):
 
 def test_model_config_no_variables(ldai_client: LDAIClient):
     context = Context.create('user-key')
-    default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
 
-    config, _ = ldai_client.config('model-config', context, default_value, {})
+    config = ldai_client.config('model-config', context, default_value, {})
 
     assert config.messages is not None
     assert len(config.messages) > 0
@@ -194,10 +194,10 @@ def test_model_config_no_variables(ldai_client: LDAIClient):
 
 def test_provider_config_handling(ldai_client: LDAIClient):
     context = Context.builder('user-key').name("Sandy").build()
-    default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
     variables = {'name': 'World'}
 
-    config, _ = ldai_client.config('model-config', context, default_value, variables)
+    config = ldai_client.config('model-config', context, default_value, variables)
 
     assert config.provider is not None
     assert config.provider.name == 'fakeProvider'
@@ -205,10 +205,10 @@ def test_provider_config_handling(ldai_client: LDAIClient):
 
 def test_context_interpolation(ldai_client: LDAIClient):
     context = Context.builder('user-key').name("Sandy").set('last', 'Beaches').build()
-    default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
     variables = {'name': 'World'}
 
-    config, _ = ldai_client.config(
+    config = ldai_client.config(
         'ctx-interpolation', context, default_value, variables
     )
 
@@ -228,10 +228,10 @@ def test_multi_context_interpolation(ldai_client: LDAIClient):
     user_context = Context.builder('user-key').name("Sandy").build()
     org_context = Context.builder('org-key').kind('org').name("LaunchDarkly").set('shortname', 'LD').build()
     context = Context.multi_builder().add(user_context).add(org_context).build()
-    default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
     variables = {'name': 'World'}
 
-    config, _ = ldai_client.config(
+    config = ldai_client.config(
         'multi-ctx-interpolation', context, default_value, variables
     )
 
@@ -249,10 +249,10 @@ def test_multi_context_interpolation(ldai_client: LDAIClient):
 
 def test_model_config_multiple(ldai_client: LDAIClient):
     context = Context.create('user-key')
-    default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
     variables = {'name': 'World', 'day': 'Monday'}
 
-    config, _ = ldai_client.config(
+    config = ldai_client.config(
         'multiple-messages', context, default_value, variables
     )
 
@@ -270,9 +270,9 @@ def test_model_config_multiple(ldai_client: LDAIClient):
 
 def test_model_config_disabled(ldai_client: LDAIClient):
     context = Context.create('user-key')
-    default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[])
 
-    config, _ = ldai_client.config('off-config', context, default_value, {})
+    config = ldai_client.config('off-config', context, default_value, {})
 
     assert config.model is not None
     assert config.enabled is False
@@ -283,9 +283,9 @@ def test_model_config_disabled(ldai_client: LDAIClient):
 
 def test_model_initial_config_disabled(ldai_client: LDAIClient):
     context = Context.create('user-key')
-    default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[])
 
-    config, _ = ldai_client.config('initial-config-disabled', context, default_value, {})
+    config = ldai_client.config('initial-config-disabled', context, default_value, {})
 
     assert config.enabled is False
     assert config.model is None
@@ -295,9 +295,9 @@ def test_model_initial_config_disabled(ldai_client: LDAIClient):
 
 def test_model_initial_config_enabled(ldai_client: LDAIClient):
     context = Context.create('user-key')
-    default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[])
 
-    config, _ = ldai_client.config('initial-config-enabled', context, default_value, {})
+    config = ldai_client.config('initial-config-enabled', context, default_value, {})
 
     assert config.enabled is True
     assert config.model is None
@@ -318,9 +318,9 @@ def test_config_method_tracking(ldai_client: LDAIClient):
 
     client = LDAIClient(mock_client)
     context = Context.create('user-key')
-    default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[])
 
-    config, tracker = client.config('test-config-key', context, default_value)
+    config = client.config('test-config-key', context, default_value)
 
     mock_client.track.assert_called_once_with(
         '$ld:ai:config:function:single',
diff --git a/ldai/testing/test_tracker.py b/ldai/testing/test_tracker.py
index 19c8161..2e39d98 100644
--- a/ldai/testing/test_tracker.py
+++ b/ldai/testing/test_tracker.py
@@ -276,7 +276,8 @@ def test_tracks_bedrock_metrics_with_error(client: LDClient):
     assert tracker.get_summary().usage == TokenUsage(330, 220, 110)
 
 
-def test_tracks_openai_metrics(client: LDClient):
+@pytest.mark.asyncio
+async def test_tracks_openai_metrics(client: LDClient):
     context = Context.create("user-key")
     tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
 
@@ -292,7 +293,10 @@ def to_dict(self):
                 "completion_tokens": 110,
             }
 
-    tracker.track_openai_metrics(lambda: Result())
+    async def get_result():
+        return Result()
+
+    await tracker.track_openai_metrics(get_result)
 
     calls = [
         call(
@@ -326,15 +330,16 @@ def to_dict(self):
     assert tracker.get_summary().usage == TokenUsage(330, 220, 110)
 
 
-def test_tracks_openai_metrics_with_exception(client: LDClient):
+@pytest.mark.asyncio
+async def test_tracks_openai_metrics_with_exception(client: LDClient):
     context = Context.create("user-key")
     tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
 
-    def raise_exception():
+    async def raise_exception():
         raise ValueError("Something went wrong")
 
     try:
-        tracker.track_openai_metrics(raise_exception)
+        await tracker.track_openai_metrics(raise_exception)
         assert False, "Should have thrown an exception"
     except ValueError:
         pass

From be1443efb9597734e9446bfedc822d450498a2fb Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 09:11:26 +0100
Subject: [PATCH 18/37] Move to packages folder

---
 .github/actions/build/action.yml              |   8 +-
 .github/workflows/manual-publish.yml          |  60 +-
 .github/workflows/release-please.yml          |  77 ++-
 .release-please-manifest.json                 |   3 +-
 Makefile                                      |  58 +-
 README.md                                     | 139 ++++-
 packages/core/LICENSE.txt                     |  13 +
 packages/core/PROVENANCE.md                   |  45 ++
 packages/core/README.md                       |  41 ++
 packages/core/ldai/__init__.py                |  66 ++
 packages/core/ldai/chat/__init__.py           |   6 +
 packages/core/ldai/chat/tracked_chat.py       | 186 ++++++
 packages/core/ldai/client.py                  | 578 ++++++++++++++++++
 packages/core/ldai/judge/__init__.py          |   7 +
 packages/core/ldai/judge/ai_judge.py          | 230 +++++++
 .../ldai/judge/evaluation_schema_builder.py   |  75 +++
 packages/core/ldai/models.py                  | 363 +++++++++++
 packages/core/ldai/providers/ai_provider.py   |  91 +++
 .../ldai/providers/ai_provider_factory.py     | 169 +++++
 packages/core/ldai/providers/types.py         |  92 +++
 packages/core/ldai/tracker.py                 | 404 ++++++++++++
 packages/core/pyproject.toml                  |  69 +++
 packages/core/tests/__init__.py               |   2 +
 packages/core/tests/test_agents.py            | 342 +++++++++++
 packages/core/tests/test_model_config.py      | 330 ++++++++++
 packages/core/tests/test_tracker.py           | 444 ++++++++++++++
 packages/langchain/README.md                  |  58 ++
 .../ldai/providers/langchain/__init__.py      | 284 +++++++++
 packages/langchain/pyproject.toml             |  40 ++
 packages/langchain/tests/__init__.py          |   2 +
 .../tests/test_langchain_provider.py          | 237 +++++++
 release-please-config.json                    |  18 +-
 32 files changed, 4456 insertions(+), 81 deletions(-)
 create mode 100644 packages/core/LICENSE.txt
 create mode 100644 packages/core/PROVENANCE.md
 create mode 100644 packages/core/README.md
 create mode 100644 packages/core/ldai/__init__.py
 create mode 100644 packages/core/ldai/chat/__init__.py
 create mode 100644 packages/core/ldai/chat/tracked_chat.py
 create mode 100644 packages/core/ldai/client.py
 create mode 100644 packages/core/ldai/judge/__init__.py
 create mode 100644 packages/core/ldai/judge/ai_judge.py
 create mode 100644 packages/core/ldai/judge/evaluation_schema_builder.py
 create mode 100644 packages/core/ldai/models.py
 create mode 100644 packages/core/ldai/providers/ai_provider.py
 create mode 100644 packages/core/ldai/providers/ai_provider_factory.py
 create mode 100644 packages/core/ldai/providers/types.py
 create mode 100644 packages/core/ldai/tracker.py
 create mode 100644 packages/core/pyproject.toml
 create mode 100644 packages/core/tests/__init__.py
 create mode 100644 packages/core/tests/test_agents.py
 create mode 100644 packages/core/tests/test_model_config.py
 create mode 100644 packages/core/tests/test_tracker.py
 create mode 100644 packages/langchain/README.md
 create mode 100644 packages/langchain/ldai/providers/langchain/__init__.py
 create mode 100644 packages/langchain/pyproject.toml
 create mode 100644 packages/langchain/tests/__init__.py
 create mode 100644 packages/langchain/tests/test_langchain_provider.py

diff --git a/.github/actions/build/action.yml b/.github/actions/build/action.yml
index 6761d0d..190ae60 100644
--- a/.github/actions/build/action.yml
+++ b/.github/actions/build/action.yml
@@ -1,5 +1,10 @@
 name: Build distribution files
 description: 'Build distribution files'
+inputs:
+  package-path:
+    description: 'Path to the package to build'
+    required: false
+    default: '.'
 outputs:
   package-hashes:
     description: "base64-encoded sha256 hashes of distribution files"
@@ -10,10 +15,11 @@ runs:
   steps:
     - name: Build distribution files
       shell: bash
+      working-directory: ${{ inputs.package-path }}
       run: poetry build
     - name: Hash build files for provenance
       id: package-hashes
       shell: bash
-      working-directory: ./dist
+      working-directory: ${{ inputs.package-path }}/dist
       run: |
         echo "package-hashes=$(sha256sum * | base64 -w0)" >> "$GITHUB_OUTPUT"
diff --git a/.github/workflows/manual-publish.yml b/.github/workflows/manual-publish.yml
index 9b35bb2..237c97d 100644
--- a/.github/workflows/manual-publish.yml
+++ b/.github/workflows/manual-publish.yml
@@ -2,20 +2,26 @@ name: Publish Package
 on:
   workflow_dispatch:
     inputs:
+      package:
+        description: 'Which package to publish'
+        required: true
+        type: choice
+        options:
+          - core
+          - langchain
+          - both
       dry_run:
         description: 'Is this a dry run? If so no package will be published.'
         type: boolean
         required: true
 
 jobs:
-  build-publish:
+  publish-core:
+    if: ${{ inputs.package == 'core' || inputs.package == 'both' }}
     runs-on: ubuntu-latest
-    # Needed to get tokens during publishing.
     permissions:
       id-token: write
       contents: read
-    outputs:
-      package-hashes: ${{ steps.build.outputs.package-hashes}}
     steps:
       - uses: actions/checkout@v4
 
@@ -34,20 +40,46 @@ jobs:
 
       - uses: ./.github/actions/build
         id: build
+        with:
+          package-path: packages/core
 
-      - name: Publish package distributions to PyPI
+      - name: Publish core package to PyPI
         if: ${{ inputs.dry_run == false }}
         uses: pypa/gh-action-pypi-publish@release/v1
         with:
-          password: ${{env.PYPI_AUTH_TOKEN}}
+          password: ${{ env.PYPI_AUTH_TOKEN }}
+          packages-dir: packages/core/dist/
 
-  release-provenance:
-    needs: [ 'build-publish' ]
+  publish-langchain:
+    if: ${{ inputs.package == 'langchain' || inputs.package == 'both' }}
+    runs-on: ubuntu-latest
     permissions:
-      actions: read
       id-token: write
-      contents: write
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
-    with:
-      base64-subjects: "${{ needs.build-publish.outputs.package-hashes }}"
-      upload-assets: ${{ !inputs.dry_run }}
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.9
+
+      - name: Install poetry
+        uses: abatilo/actions-poetry@7b6d33e44b4f08d7021a1dee3c044e9c253d6439
+
+      - uses: launchdarkly/gh-actions/actions/release-secrets@release-secrets-v1.2.0
+        name: 'Get PyPI token'
+        with:
+          aws_assume_role: ${{ vars.AWS_ROLE_ARN }}
+          ssm_parameter_pairs: '/production/common/releasing/pypi/token = PYPI_AUTH_TOKEN'
+
+      - uses: ./.github/actions/build
+        id: build
+        with:
+          package-path: packages/langchain
+
+      - name: Publish langchain package to PyPI
+        if: ${{ inputs.dry_run == false }}
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          password: ${{ env.PYPI_AUTH_TOKEN }}
+          packages-dir: packages/langchain/dist/
diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml
index 7176356..5acd354 100644
--- a/.github/workflows/release-please.yml
+++ b/.github/workflows/release-please.yml
@@ -5,36 +5,40 @@ on:
     branches: [ main ]
 
 jobs:
-  release-package:
+  release-please:
     runs-on: ubuntu-latest
     permissions:
-      id-token: write # Needed if using OIDC to get release secrets.
-      contents: write # Contents and pull-requests are for release-please to make releases.
+      contents: write
       pull-requests: write
     outputs:
-      release-created: ${{ steps.release.outputs.release_created }}
-      upload-tag-name: ${{ steps.release.outputs.tag_name }}
-      package-hashes: ${{ steps.build.outputs.package-hashes}}
+      releases_created: ${{ steps.release.outputs.releases_created }}
+      core_release_created: ${{ steps.release.outputs['packages/core--release_created'] }}
+      langchain_release_created: ${{ steps.release.outputs['packages/langchain--release_created'] }}
     steps:
       - uses: googleapis/release-please-action@v4
         id: release
+        with:
+          config-file: release-please-config.json
+          manifest-file: .release-please-manifest.json
 
+  release-core:
+    needs: release-please
+    if: ${{ needs.release-please.outputs.core_release_created == 'true' }}
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+      contents: write
+    steps:
       - uses: actions/checkout@v4
-        if: ${{ steps.release.outputs.releases_created == 'true' }}
-        with:
-          fetch-depth: 0 # If you only need the current version keep this.
 
       - uses: actions/setup-python@v5
-        if: ${{ steps.release.outputs.releases_created == 'true' }}
         with:
           python-version: 3.9
 
       - name: Install poetry
-        if: ${{ steps.release.outputs.releases_created == 'true' }}
         uses: abatilo/actions-poetry@7b6d33e44b4f08d7021a1dee3c044e9c253d6439
 
       - uses: launchdarkly/gh-actions/actions/release-secrets@release-secrets-v1.2.0
-        if: ${{ steps.release.outputs.releases_created == 'true' }}
         name: 'Get PyPI token'
         with:
           aws_assume_role: ${{ vars.AWS_ROLE_ARN }}
@@ -42,26 +46,47 @@ jobs:
 
       - uses: ./.github/actions/build
         id: build
-        if: ${{ steps.release.outputs.releases_created == 'true' }}
+        with:
+          package-path: packages/core
 
       - uses: ./.github/actions/build-docs
-        if: ${{ steps.release.outputs.releases_created == 'true' }}
 
-      - name: Publish package distributions to PyPI
-        if: ${{ steps.release.outputs.releases_created == 'true' }}
+      - name: Publish core package to PyPI
         uses: pypa/gh-action-pypi-publish@release/v1
         with:
-          password: ${{env.PYPI_AUTH_TOKEN}}
+          password: ${{ env.PYPI_AUTH_TOKEN }}
+          packages-dir: packages/core/dist/
 
-  release-provenance:
-    needs: [ 'release-package' ]
-    if: ${{ needs.release-package.outputs.release-created == 'true' }}
+  release-langchain:
+    needs: release-please
+    if: ${{ needs.release-please.outputs.langchain_release_created == 'true' }}
+    runs-on: ubuntu-latest
     permissions:
-      actions: read
       id-token: write
       contents: write
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
-    with:
-      base64-subjects: "${{ needs.release-package.outputs.package-hashes }}"
-      upload-assets: true
-      upload-tag-name: ${{ needs.release-package.outputs.upload-tag-name }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.9
+
+      - name: Install poetry
+        uses: abatilo/actions-poetry@7b6d33e44b4f08d7021a1dee3c044e9c253d6439
+
+      - uses: launchdarkly/gh-actions/actions/release-secrets@release-secrets-v1.2.0
+        name: 'Get PyPI token'
+        with:
+          aws_assume_role: ${{ vars.AWS_ROLE_ARN }}
+          ssm_parameter_pairs: '/production/common/releasing/pypi/token = PYPI_AUTH_TOKEN'
+
+      - uses: ./.github/actions/build
+        id: build
+        with:
+          package-path: packages/langchain
+
+      - name: Publish langchain package to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          password: ${{ env.PYPI_AUTH_TOKEN }}
+          packages-dir: packages/langchain/dist/
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 30b6d45..d41663b 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,4 @@
 {
-  ".": "0.10.1"
+  "packages/core": "0.10.1",
+  "packages/langchain": "0.1.0"
 }
diff --git a/Makefile b/Makefile
index 791925c..eb55373 100644
--- a/Makefile
+++ b/Makefile
@@ -14,24 +14,60 @@ help: #! Show this help message
 	@grep -h -F '#!' $(MAKEFILE_LIST) | grep -v grep | sed 's/:.*#!/:/' | column -t -s":"
 
 .PHONY: install
-install:
-	@poetry install
+install: #! Install all packages
+	@echo "Installing core package..."
+	@cd packages/core && poetry install
+	@echo "Installing langchain package..."
+	@cd packages/langchain && poetry install
+
+.PHONY: install-core
+install-core: #! Install core package only
+	@cd packages/core && poetry install
+
+.PHONY: install-langchain
+install-langchain: #! Install langchain package only
+	@cd packages/langchain && poetry install
 
 #
 # Quality control checks
 #
 
 .PHONY: test
-test: #! Run unit tests
-test: install
-	@poetry run pytest $(PYTEST_FLAGS)
+test: #! Run unit tests for all packages
+	@echo "Testing core package..."
+	@cd packages/core && poetry run pytest $(PYTEST_FLAGS)
+	@echo "Testing langchain package..."
+	@cd packages/langchain && poetry run pytest $(PYTEST_FLAGS)
+
+.PHONY: test-core
+test-core: #! Run unit tests for core package
+	@cd packages/core && poetry run pytest $(PYTEST_FLAGS)
+
+.PHONY: test-langchain
+test-langchain: #! Run unit tests for langchain package
+	@cd packages/langchain && poetry run pytest $(PYTEST_FLAGS)
 
 .PHONY: lint
 lint: #! Run type analysis and linting checks
-lint: install
-	@poetry run mypy ldai
-	@poetry run isort --check --atomic ldai
-	@poetry run pycodestyle ldai
+	@echo "Linting core package..."
+	@cd packages/core && poetry run mypy ldai
+	@cd packages/core && poetry run isort --check --atomic ldai
+	@cd packages/core && poetry run pycodestyle ldai
+
+.PHONY: build
+build: #! Build all packages
+	@echo "Building core package..."
+	@cd packages/core && poetry build
+	@echo "Building langchain package..."
+	@cd packages/langchain && poetry build
+
+.PHONY: build-core
+build-core: #! Build core package
+	@cd packages/core && poetry build
+
+.PHONY: build-langchain
+build-langchain: #! Build langchain package
+	@cd packages/langchain && poetry build
 
 #
 # Documentation generation
@@ -39,6 +75,6 @@ lint: install
 
 .PHONY: docs
 docs: #! Generate sphinx-based documentation
-	@poetry install --with docs
+	@cd packages/core && poetry install --with docs
 	@cd docs
-	@poetry run $(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+	@cd packages/core && poetry run $(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/README.md b/README.md
index 0411abe..ea5b04e 100644
--- a/README.md
+++ b/README.md
@@ -1,41 +1,130 @@
-# LaunchDarkly Server-side AI library for Python
+# LaunchDarkly Server-side AI SDK for Python - Monorepo
 
-## LaunchDarkly overview
+This repository contains the LaunchDarkly AI SDK for Python and its provider packages.
 
-[LaunchDarkly](https://www.launchdarkly.com) is a feature management platform that serves trillions of feature flags daily to help teams build better software, faster. [Get started](https://docs.launchdarkly.com/home/getting-started) using LaunchDarkly today!
+## Packages
 
-[![Twitter Follow](https://img.shields.io/twitter/follow/launchdarkly.svg?style=social&label=Follow&maxAge=2592000)](https://twitter.com/intent/follow?screen_name=launchdarkly)
+### Core SDK
+**Package:** [`launchdarkly-server-sdk-ai`](./packages/core/)  
+**PyPI:** https://pypi.org/project/launchdarkly-server-sdk-ai/
 
-## Supported Python versions
+The core LaunchDarkly AI SDK providing:
+- AI configuration management
+- Tracking and metrics
+- Provider abstraction layer
+- Chat management
 
-This version of the library has a minimum Python version of 3.9.
+```bash
+pip install launchdarkly-server-sdk-ai
+```
 
-## Getting started
+### LangChain Provider
+**Package:** [`launchdarkly-server-sdk-ai-langchain`](./packages/langchain/)  
+**PyPI:** https://pypi.org/project/launchdarkly-server-sdk-ai-langchain/
 
-Refer to the [SDK reference guide](https://docs.launchdarkly.com/sdk/ai/python) for instructions on getting started with using the SDK.
+LangChain provider supporting multiple AI providers through LangChain's unified interface.
 
-## Learn more
+```bash
+pip install launchdarkly-server-sdk-ai-langchain
+```
 
-Read our [documentation](http://docs.launchdarkly.com) for in-depth instructions on configuring and using LaunchDarkly. You can also head straight to the [reference guide for the python SDK](http://docs.launchdarkly.com/docs/python-sdk-ai-reference).
+## Installation
+
+### Basic Installation
+```bash
+# Install core SDK
+pip install launchdarkly-server-sdk-ai
+
+# Install with LangChain provider
+pip install launchdarkly-server-sdk-ai-langchain
+```
+
+### Development Installation
+```bash
+# Clone the repository
+git clone https://github.com/launchdarkly/python-server-sdk-ai.git
+cd python-server-sdk-ai
+
+# Install core package
+cd packages/core
+poetry install
+
+# Install langchain package (in separate terminal/session)
+cd packages/langchain
+poetry install
+```
+
+## Usage
+
+```python
+from ldclient import init, Context
+from ldai import init_ai
+
+# Initialize
+ld_client = init('your-sdk-key')
+ai_client = init_ai(ld_client)
+
+# Create a chat (automatically uses installed providers)
+context = Context.create('user-key')
+chat = await ai_client.create_chat('chat-config', context)
+
+if chat:
+    response = await chat.invoke('Hello!')
+    print(response.message.content)
+```
+
+## Documentation
+
+- [SDK Reference Guide](https://docs.launchdarkly.com/sdk/ai/python)
+- [API Documentation](https://launchdarkly-python-sdk-ai.readthedocs.io/)
+- [Core Package README](./packages/core/README.md)
+- [LangChain Provider README](./packages/langchain/README.md)
+
+## Repository Structure
+
+```
+python-server-sdk-ai/
+├── packages/
+│   ├── core/                    # Core SDK
+│   │   ├── ldai/               # Main SDK code
+│   │   ├── pyproject.toml
+│   │   └── README.md
+│   └── langchain/               # LangChain provider
+│       ├── ldai/
+│       │   └── providers/
+│       │       └── langchain/
+│       ├── pyproject.toml
+│       └── README.md
+├── .github/
+│   └── workflows/               # CI/CD workflows
+├── release-please-config.json   # Multi-package release config
+└── .release-please-manifest.json # Version tracking
+```
+
+## Publishing
+
+Each package is published independently to PyPI:
+- Core: `launchdarkly-server-sdk-ai`
+- LangChain: `launchdarkly-server-sdk-ai-langchain`
+
+Releases are managed automatically via Release Please when changes are merged to `main`.
 
 ## Contributing
 
-We encourage pull requests and other contributions from the community. Check out our [contributing guidelines](CONTRIBUTING.md) for instructions on how to contribute to this library.
+See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
+
+## Testing
 
-## Verifying library build provenance with the SLSA framework
+```bash
+# Test core package
+cd packages/core
+poetry run pytest
 
-LaunchDarkly uses the [SLSA framework](https://slsa.dev/spec/v1.0/about) (Supply-chain Levels for Software Artifacts) to help developers make their supply chain more secure by ensuring the authenticity and build integrity of our published library packages. To learn more, see the [provenance guide](PROVENANCE.md).
+# Test langchain package
+cd packages/langchain
+poetry run pytest
+```
 
-## About LaunchDarkly
+## License
 
-- LaunchDarkly is a continuous delivery platform that provides feature flags as a service and allows developers to iterate quickly and safely. We allow you to easily flag your features and manage them from the LaunchDarkly dashboard. With LaunchDarkly, you can:
-  - Roll out a new feature to a subset of your users (like a group of users who opt-in to a beta tester group), gathering feedback and bug reports from real-world use cases.
-  - Gradually roll out a feature to an increasing percentage of users, and track the effect that the feature has on key metrics (for instance, how likely is a user to complete a purchase if they have feature A versus feature B?).
-  - Turn off a feature that you realize is causing performance problems in production, without needing to re-deploy, or even restart the application with a changed configuration file.
-  - Grant access to certain features based on user attributes, like payment plan (eg: users on the ‘gold’ plan get access to more features than users in the ‘silver’ plan). Disable parts of your application to facilitate maintenance, without taking everything offline.
-- LaunchDarkly provides feature flag SDKs for a wide variety of languages and technologies. Read [our documentation](https://docs.launchdarkly.com/sdk) for a complete list.
-- Explore LaunchDarkly
-  - [launchdarkly.com](https://www.launchdarkly.com/ "LaunchDarkly Main Website") for more information
-  - [docs.launchdarkly.com](https://docs.launchdarkly.com/ "LaunchDarkly Documentation") for our documentation and SDK reference guides
-  - [apidocs.launchdarkly.com](https://apidocs.launchdarkly.com/ "LaunchDarkly API Documentation") for our API documentation
-  - [blog.launchdarkly.com](https://blog.launchdarkly.com/ "LaunchDarkly Blog Documentation") for the latest product updates
+Apache-2.0. See [LICENSE.txt](LICENSE.txt)
diff --git a/packages/core/LICENSE.txt b/packages/core/LICENSE.txt
new file mode 100644
index 0000000..50add35
--- /dev/null
+++ b/packages/core/LICENSE.txt
@@ -0,0 +1,13 @@
+Copyright 2024 Catamorphic, Co.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/packages/core/PROVENANCE.md b/packages/core/PROVENANCE.md
new file mode 100644
index 0000000..73d317c
--- /dev/null
+++ b/packages/core/PROVENANCE.md
@@ -0,0 +1,45 @@
+## Verifying SDK build provenance with the SLSA framework
+
+LaunchDarkly uses the [SLSA framework](https://slsa.dev/spec/v1.0/about) (Supply-chain Levels for Software Artifacts) to help developers make their supply chain more secure by ensuring the authenticity and build integrity of our published SDK packages.
+
+As part of [SLSA requirements for level 3 compliance](https://slsa.dev/spec/v1.0/requirements), LaunchDarkly publishes provenance about our SDK package builds using [GitHub's generic SLSA3 provenance generator](https://github.com/slsa-framework/slsa-github-generator/blob/main/internal/builders/generic/README.md#generation-of-slsa3-provenance-for-arbitrary-projects) for distribution alongside our packages. These attestations are available for download from the GitHub release page for the release version under Assets > `multiple.intoto.jsonl`.
+
+To verify SLSA provenance attestations, we recommend using [slsa-verifier](https://github.com/slsa-framework/slsa-verifier). Example usage for verifying a package is included below:
+
+<!-- x-release-please-start-version -->
+
+```
+# Set the version of the library to verify
+VERSION=0.10.1
+```
+
+<!-- x-release-please-end -->
+
+```
+# Download package from PyPi
+$ pip download --only-binary=:all: launchdarkly-server-sdk-ai==${VERSION}
+
+# Download provenance from Github release into same directory
+$ curl --location -O \
+  https://github.com/launchdarkly/python-server-sdk-ai/releases/download/${VERSION}/multiple.intoto.jsonl
+
+# Run slsa-verifier to verify provenance against package artifacts
+$ slsa-verifier verify-artifact \
+--provenance-path multiple.intoto.jsonl \
+--source-uri github.com/launchdarkly/python-server-sdk-ai \
+launchdarkly_server_sdk_ai-${VERSION}-py3-none-any.whl
+```
+
+Below is a sample of expected output.
+
+```
+Verified signature against tlog entry index 150910243 at URL: https://rekor.sigstore.dev/api/v1/log/entries/108e9186e8c5677ab3f14fc82cd3deb769e07ef812cadda623c08c77d4e51fc03124ee7542c470a1
+Verified build using builder "https://github.com/slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@refs/tags/v2.0.0" at commit 8e2d4094b4833d075e70dfce43bbc7176008c4a1
+Verifying artifact launchdarkly_server_sdk_ai-0.3.0-py3-none-any.whl: PASSED
+
+PASSED: SLSA verification passed
+```
+
+Alternatively, to verify the provenance manually, the SLSA framework specifies [recommendations for verifying build artifacts](https://slsa.dev/spec/v1.0/verifying-artifacts) in their documentation.
+
+**Note:** These instructions do not apply when building our libraries from source.
diff --git a/packages/core/README.md b/packages/core/README.md
new file mode 100644
index 0000000..0411abe
--- /dev/null
+++ b/packages/core/README.md
@@ -0,0 +1,41 @@
+# LaunchDarkly Server-side AI library for Python
+
+## LaunchDarkly overview
+
+[LaunchDarkly](https://www.launchdarkly.com) is a feature management platform that serves trillions of feature flags daily to help teams build better software, faster. [Get started](https://docs.launchdarkly.com/home/getting-started) using LaunchDarkly today!
+
+[![Twitter Follow](https://img.shields.io/twitter/follow/launchdarkly.svg?style=social&label=Follow&maxAge=2592000)](https://twitter.com/intent/follow?screen_name=launchdarkly)
+
+## Supported Python versions
+
+This version of the library has a minimum Python version of 3.9.
+
+## Getting started
+
+Refer to the [SDK reference guide](https://docs.launchdarkly.com/sdk/ai/python) for instructions on getting started with using the SDK.
+
+## Learn more
+
+Read our [documentation](http://docs.launchdarkly.com) for in-depth instructions on configuring and using LaunchDarkly. You can also head straight to the [reference guide for the python SDK](http://docs.launchdarkly.com/docs/python-sdk-ai-reference).
+
+## Contributing
+
+We encourage pull requests and other contributions from the community. Check out our [contributing guidelines](CONTRIBUTING.md) for instructions on how to contribute to this library.
+
+## Verifying library build provenance with the SLSA framework
+
+LaunchDarkly uses the [SLSA framework](https://slsa.dev/spec/v1.0/about) (Supply-chain Levels for Software Artifacts) to help developers make their supply chain more secure by ensuring the authenticity and build integrity of our published library packages. To learn more, see the [provenance guide](PROVENANCE.md).
+
+## About LaunchDarkly
+
+- LaunchDarkly is a continuous delivery platform that provides feature flags as a service and allows developers to iterate quickly and safely. We allow you to easily flag your features and manage them from the LaunchDarkly dashboard. With LaunchDarkly, you can:
+  - Roll out a new feature to a subset of your users (like a group of users who opt-in to a beta tester group), gathering feedback and bug reports from real-world use cases.
+  - Gradually roll out a feature to an increasing percentage of users, and track the effect that the feature has on key metrics (for instance, how likely is a user to complete a purchase if they have feature A versus feature B?).
+  - Turn off a feature that you realize is causing performance problems in production, without needing to re-deploy, or even restart the application with a changed configuration file.
+  - Grant access to certain features based on user attributes, like payment plan (eg: users on the ‘gold’ plan get access to more features than users in the ‘silver’ plan). Disable parts of your application to facilitate maintenance, without taking everything offline.
+- LaunchDarkly provides feature flag SDKs for a wide variety of languages and technologies. Read [our documentation](https://docs.launchdarkly.com/sdk) for a complete list.
+- Explore LaunchDarkly
+  - [launchdarkly.com](https://www.launchdarkly.com/ "LaunchDarkly Main Website") for more information
+  - [docs.launchdarkly.com](https://docs.launchdarkly.com/ "LaunchDarkly Documentation") for our documentation and SDK reference guides
+  - [apidocs.launchdarkly.com](https://apidocs.launchdarkly.com/ "LaunchDarkly API Documentation") for our API documentation
+  - [blog.launchdarkly.com](https://blog.launchdarkly.com/ "LaunchDarkly Blog Documentation") for the latest product updates
diff --git a/packages/core/ldai/__init__.py b/packages/core/ldai/__init__.py
new file mode 100644
index 0000000..1363115
--- /dev/null
+++ b/packages/core/ldai/__init__.py
@@ -0,0 +1,66 @@
+__version__ = "0.10.1"  # x-release-please-version
+
+# Extend __path__ to support namespace packages at the ldai level
+# This allows provider packages (like launchdarkly-server-sdk-ai-langchain)
+# to extend ldai.providers.* even though ldai itself has an __init__.py
+import sys
+from pkgutil import extend_path
+__path__ = extend_path(__path__, __name__)
+
+# Export main client
+from ldai.client import LDAIClient
+
+# Export models for convenience
+from ldai.models import (
+    AIAgentConfig,
+    AIAgentConfigDefault,
+    AIAgentConfigRequest,
+    AIAgents,
+    AICompletionConfig,
+    AICompletionConfigDefault,
+    AIJudgeConfig,
+    AIJudgeConfigDefault,
+    JudgeConfiguration,
+    LDMessage,
+    ModelConfig,
+    ProviderConfig,
+    # Deprecated aliases for backward compatibility
+    AIConfig,
+    LDAIAgent,
+    LDAIAgentConfig,
+    LDAIAgentDefaults,
+)
+
+# Export judge
+from ldai.judge import AIJudge
+
+# Export chat
+from ldai.chat import TrackedChat
+
+# Export judge types
+from ldai.providers.types import EvalScore, JudgeResponse
+
+__all__ = [
+    'LDAIClient',
+    'AIAgentConfig',
+    'AIAgentConfigDefault',
+    'AIAgentConfigRequest',
+    'AIAgents',
+    'AICompletionConfig',
+    'AICompletionConfigDefault',
+    'AIJudgeConfig',
+    'AIJudgeConfigDefault',
+    'AIJudge',
+    'TrackedChat',
+    'EvalScore',
+    'JudgeConfiguration',
+    'JudgeResponse',
+    'LDMessage',
+    'ModelConfig',
+    'ProviderConfig',
+    # Deprecated exports
+    'AIConfig',
+    'LDAIAgent',
+    'LDAIAgentConfig',
+    'LDAIAgentDefaults',
+]
diff --git a/packages/core/ldai/chat/__init__.py b/packages/core/ldai/chat/__init__.py
new file mode 100644
index 0000000..ffef9ab
--- /dev/null
+++ b/packages/core/ldai/chat/__init__.py
@@ -0,0 +1,6 @@
+"""Chat module for LaunchDarkly AI SDK."""
+
+from ldai.chat.tracked_chat import TrackedChat
+
+__all__ = ['TrackedChat']
+
diff --git a/packages/core/ldai/chat/tracked_chat.py b/packages/core/ldai/chat/tracked_chat.py
new file mode 100644
index 0000000..037fd9a
--- /dev/null
+++ b/packages/core/ldai/chat/tracked_chat.py
@@ -0,0 +1,186 @@
+"""TrackedChat implementation for managing AI chat conversations."""
+
+import asyncio
+from typing import Any, Dict, List, Optional
+
+from ldai.models import AICompletionConfig, LDMessage
+from ldai.providers.ai_provider import AIProvider
+from ldai.providers.types import ChatResponse, JudgeResponse
+from ldai.judge import AIJudge
+from ldai.tracker import LDAIConfigTracker
+
+
+class TrackedChat:
+    """
+    Concrete implementation of TrackedChat that provides chat functionality
+    by delegating to an AIProvider implementation.
+    
+    This class handles conversation management and tracking, while delegating
+    the actual model invocation to the provider.
+    """
+
+    def __init__(
+        self,
+        ai_config: AICompletionConfig,
+        tracker: LDAIConfigTracker,
+        provider: AIProvider,
+        judges: Optional[Dict[str, AIJudge]] = None,
+        logger: Optional[Any] = None,
+    ):
+        """
+        Initialize the TrackedChat.
+        
+        :param ai_config: The completion AI configuration
+        :param tracker: The tracker for the completion configuration
+        :param provider: The AI provider to use for chat
+        :param judges: Optional dictionary of judge instances keyed by their configuration keys
+        :param logger: Optional logger for logging
+        """
+        self._ai_config = ai_config
+        self._tracker = tracker
+        self._provider = provider
+        self._judges = judges or {}
+        self._logger = logger
+        self._messages: List[LDMessage] = []
+
+    async def invoke(self, prompt: str) -> ChatResponse:
+        """
+        Invoke the chat model with a prompt string.
+        
+        This method handles conversation management and tracking, delegating to the provider's invoke_model method.
+        
+        :param prompt: The user prompt to send to the chat model
+        :return: ChatResponse containing the model's response and metrics
+        """
+        # Convert prompt string to LDMessage with role 'user' and add to conversation history
+        user_message: LDMessage = LDMessage(role='user', content=prompt)
+        self._messages.append(user_message)
+
+        # Prepend config messages to conversation history for model invocation
+        config_messages = self._ai_config.messages or []
+        all_messages = config_messages + self._messages
+
+        # Delegate to provider-specific implementation with tracking
+        response = await self._tracker.track_metrics_of(
+            lambda result: result.metrics,
+            lambda: self._provider.invoke_model(all_messages),
+        )
+
+        # Start judge evaluations as async tasks (don't await them)
+        judge_config = self._ai_config.judge_configuration
+        if judge_config and judge_config.judges and len(judge_config.judges) > 0:
+            evaluation_tasks = self._start_judge_evaluations(self._messages, response)
+            response.evaluations = evaluation_tasks
+
+        # Add the response message to conversation history
+        self._messages.append(response.message)
+        return response
+
+    def _start_judge_evaluations(
+        self,
+        messages: List[LDMessage],
+        response: ChatResponse,
+    ) -> List[asyncio.Task[Optional[JudgeResponse]]]:
+        """
+        Start judge evaluations as async tasks without awaiting them.
+        
+        Returns a list of async tasks that can be awaited later.
+        
+        :param messages: Array of messages representing the conversation history
+        :param response: The AI response to be evaluated
+        :return: List of async tasks that will return judge evaluation results
+        """
+        if not self._ai_config.judge_configuration or not self._ai_config.judge_configuration.judges:
+            return []
+
+        judge_configs = self._ai_config.judge_configuration.judges
+
+        # Start all judge evaluations as tasks
+        async def evaluate_judge(judge_config):
+            judge = self._judges.get(judge_config.key)
+            if not judge:
+                if self._logger:
+                    self._logger.warn(
+                        f"Judge configuration is not enabled: {judge_config.key}",
+                    )
+                return None
+
+            eval_result = await judge.evaluate_messages(
+                messages, response, judge_config.sampling_rate
+            )
+
+            if eval_result and eval_result.success:
+                self._tracker.track_eval_scores(eval_result.evals)
+
+            return eval_result
+
+        # Create tasks for each judge evaluation
+        tasks = [
+            asyncio.create_task(evaluate_judge(judge_config))
+            for judge_config in judge_configs
+        ]
+        
+        return tasks
+
+    def get_config(self) -> AICompletionConfig:
+        """
+        Get the underlying AI configuration used to initialize this TrackedChat.
+        
+        :return: The AI completion configuration
+        """
+        return self._ai_config
+
+    def get_tracker(self) -> LDAIConfigTracker:
+        """
+        Get the underlying AI configuration tracker used to initialize this TrackedChat.
+        
+        :return: The tracker instance
+        """
+        return self._tracker
+
+    def get_provider(self) -> AIProvider:
+        """
+        Get the underlying AI provider instance.
+        
+        This provides direct access to the provider for advanced use cases.
+        
+        :return: The AI provider instance
+        """
+        return self._provider
+
+    def get_judges(self) -> Dict[str, AIJudge]:
+        """
+        Get the judges associated with this TrackedChat.
+        
+        Returns a dictionary of judge instances keyed by their configuration keys.
+        
+        :return: Dictionary of judge instances
+        """
+        return self._judges
+
+    def append_messages(self, messages: List[LDMessage]) -> None:
+        """
+        Append messages to the conversation history.
+        
+        Adds messages to the conversation history without invoking the model,
+        which is useful for managing multi-turn conversations or injecting context.
+        
+        :param messages: Array of messages to append to the conversation history
+        """
+        self._messages.extend(messages)
+
+    def get_messages(self, include_config_messages: bool = False) -> List[LDMessage]:
+        """
+        Get all messages in the conversation history.
+        
+        :param include_config_messages: Whether to include the config messages from the AIConfig.
+                                       Defaults to False.
+        :return: Array of messages. When include_config_messages is True, returns both config
+                messages and conversation history with config messages prepended. When False,
+                returns only the conversation history messages.
+        """
+        if include_config_messages:
+            config_messages = self._ai_config.messages or []
+            return config_messages + self._messages
+        return list(self._messages)
+
diff --git a/packages/core/ldai/client.py b/packages/core/ldai/client.py
new file mode 100644
index 0000000..4f4b6b9
--- /dev/null
+++ b/packages/core/ldai/client.py
@@ -0,0 +1,578 @@
+import logging
+from typing import Any, Dict, List, Optional, Tuple
+
+import chevron
+from ldclient import Context
+from ldclient.client import LDClient
+
+from ldai.chat import TrackedChat
+from ldai.judge import AIJudge
+from ldai.models import (
+    AIAgentConfig,
+    AIAgentConfigDefault,
+    AIAgentConfigRequest,
+    AIAgents,
+    AICompletionConfig,
+    AICompletionConfigDefault,
+    AIJudgeConfig,
+    AIJudgeConfigDefault,
+    JudgeConfiguration,
+    LDMessage,
+    ModelConfig,
+    ProviderConfig,
+)
+from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider
+from ldai.tracker import LDAIConfigTracker
+
+
+class LDAIClient:
+    """The LaunchDarkly AI SDK client object."""
+
+    def __init__(self, client: LDClient):
+        self._client = client
+        self._logger = logging.getLogger('ldclient.ai')
+
+    def completion_config(
+        self,
+        key: str,
+        context: Context,
+        default_value: AICompletionConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+    ) -> AICompletionConfig:
+        """
+        Get the value of a completion configuration.
+
+        :param key: The key of the completion configuration.
+        :param context: The context to evaluate the completion configuration in.
+        :param default_value: The default value of the completion configuration.
+        :param variables: Additional variables for the completion configuration.
+        :return: The completion configuration with a tracker used for gathering metrics.
+        """
+        self._client.track('$ld:ai:config:function:single', context, key, 1)
+
+        model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate(
+            key, context, default_value.to_dict(), variables
+        )
+
+        config = AICompletionConfig(
+            enabled=bool(enabled),
+            model=model,
+            messages=messages,
+            provider=provider,
+            tracker=tracker,
+            judge_configuration=judge_configuration,
+        )
+
+        return config
+
+    def config(
+        self,
+        key: str,
+        context: Context,
+        default_value: AICompletionConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+    ) -> AICompletionConfig:
+        """
+        Get the value of a model configuration.
+
+        .. deprecated:: Use :meth:`completion_config` instead. This method will be removed in a future version.
+
+        :param key: The key of the model configuration.
+        :param context: The context to evaluate the model configuration in.
+        :param default_value: The default value of the model configuration.
+        :param variables: Additional variables for the model configuration.
+        :return: The value of the model configuration along with a tracker used for gathering metrics.
+        """
+        return self.completion_config(key, context, default_value, variables)
+
+    def judge_config(
+        self,
+        key: str,
+        context: Context,
+        default_value: AIJudgeConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+    ) -> AIJudgeConfig:
+        """
+        Get the value of a judge configuration.
+
+        :param key: The key of the judge configuration.
+        :param context: The context to evaluate the judge configuration in.
+        :param default_value: The default value of the judge configuration.
+        :param variables: Additional variables for the judge configuration.
+        :return: The judge configuration with a tracker used for gathering metrics.
+        """
+        self._client.track('$ld:ai:judge:function:single', context, key, 1)
+
+        model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate(
+            key, context, default_value.to_dict(), variables
+        )
+
+        # Extract evaluation_metric_keys from the variation
+        variation = self._client.variation(key, context, default_value.to_dict())
+        evaluation_metric_keys = variation.get('evaluationMetricKeys', default_value.evaluation_metric_keys or [])
+
+        config = AIJudgeConfig(
+            enabled=bool(enabled),
+            evaluation_metric_keys=evaluation_metric_keys,
+            model=model,
+            messages=messages,
+            provider=provider,
+            tracker=tracker,
+        )
+
+        return config
+
+    async def create_judge(
+        self,
+        key: str,
+        context: Context,
+        default_value: AIJudgeConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+        default_ai_provider: Optional[SupportedAIProvider] = None,
+    ) -> Optional[AIJudge]:
+        """
+        Creates and returns a new Judge instance for AI evaluation.
+
+        :param key: The key identifying the AI judge configuration to use
+        :param context: Standard Context used when evaluating flags
+        :param default_value: A default value representing a standard AI config result
+        :param variables: Dictionary of values for instruction interpolation.
+            The variables `message_history` and `response_to_evaluate` are reserved for the judge and will be ignored.
+        :param default_ai_provider: Optional default AI provider to use.
+        :return: Judge instance or None if disabled/unsupported
+
+        Example::
+
+            judge = client.create_judge(
+                "relevance-judge",
+                context,
+                AIJudgeConfigDefault(
+                    enabled=True,
+                    model=ModelConfig("gpt-4"),
+                    provider=ProviderConfig("openai"),
+                    evaluation_metric_keys=['$ld:ai:judge:relevance'],
+                    messages=[LDMessage(role='system', content='You are a relevance judge.')]
+                ),
+                variables={'metric': "relevance"}
+            )
+
+            if judge:
+                result = await judge.evaluate("User question", "AI response")
+                if result and result.evals:
+                    relevance_eval = result.evals.get('$ld:ai:judge:relevance')
+                    if relevance_eval:
+                        print('Relevance score:', relevance_eval.score)
+        """
+        self._client.track('$ld:ai:judge:function:createJudge', context, key, 1)
+
+        try:
+            # Warn if reserved variables are provided
+            if variables:
+                if 'message_history' in variables:
+                    # Note: Python doesn't have a logger on the client, but we could add one
+                    pass  # Would log warning if logger available
+                if 'response_to_evaluate' in variables:
+                    pass  # Would log warning if logger available
+
+            # Overwrite reserved variables to ensure they remain as placeholders for judge evaluation
+            extended_variables = dict(variables) if variables else {}
+            extended_variables['message_history'] = '{{message_history}}'
+            extended_variables['response_to_evaluate'] = '{{response_to_evaluate}}'
+
+            judge_config = self.judge_config(key, context, default_value, extended_variables)
+
+            if not judge_config.enabled or not judge_config.tracker:
+                # Would log info if logger available
+                return None
+
+            # Create AI provider for the judge
+            provider = await AIProviderFactory.create(judge_config, self._logger, default_ai_provider)
+            if not provider:
+                return None
+
+            return AIJudge(judge_config, judge_config.tracker, provider, self._logger)
+        except Exception as error:
+            # Would log error if logger available
+            return None
+
+    async def _initialize_judges(
+        self,
+        judge_configs: List[JudgeConfiguration.Judge],
+        context: Context,
+        variables: Optional[Dict[str, Any]] = None,
+        default_ai_provider: Optional[SupportedAIProvider] = None,
+    ) -> Dict[str, AIJudge]:
+        """
+        Initialize judges from judge configurations.
+        
+        :param judge_configs: List of judge configurations
+        :param context: Standard Context used when evaluating flags
+        :param variables: Dictionary of values for instruction interpolation
+        :param default_ai_provider: Optional default AI provider to use
+        :return: Dictionary of judge instances keyed by their configuration keys
+        """
+        judges: Dict[str, AIJudge] = {}
+        
+        async def create_judge_for_config(judge_key: str):
+            judge = await self.create_judge(
+                judge_key,
+                context,
+                AIJudgeConfigDefault(enabled=False),
+                variables,
+                default_ai_provider,
+            )
+            return judge_key, judge
+        
+        judge_promises = [
+            create_judge_for_config(judge_config.key)
+            for judge_config in judge_configs
+        ]
+        
+        import asyncio
+        results = await asyncio.gather(*judge_promises, return_exceptions=True)
+        
+        for result in results:
+            if isinstance(result, Exception):
+                continue
+            judge_key, judge = result
+            if judge:
+                judges[judge_key] = judge
+        
+        return judges
+
+    async def create_chat(
+        self,
+        key: str,
+        context: Context,
+        default_value: AICompletionConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+        default_ai_provider: Optional[SupportedAIProvider] = None,
+    ) -> Optional[TrackedChat]:
+        """
+        Creates and returns a new TrackedChat instance for AI chat conversations.
+
+        :param key: The key identifying the AI completion configuration to use
+        :param context: Standard Context used when evaluating flags
+        :param default_value: A default value representing a standard AI config result
+        :param variables: Dictionary of values for instruction interpolation
+        :param default_ai_provider: Optional default AI provider to use
+        :return: TrackedChat instance or None if disabled/unsupported
+
+        Example::
+
+            chat = await client.create_chat(
+                "customer-support-chat",
+                context,
+                AICompletionConfigDefault(
+                    enabled=True,
+                    model=ModelConfig("gpt-4"),
+                    provider=ProviderConfig("openai"),
+                    messages=[LDMessage(role='system', content='You are a helpful assistant.')]
+                ),
+                variables={'customerName': 'John'}
+            )
+
+            if chat:
+                response = await chat.invoke("I need help with my order")
+                print(response.message.content)
+                
+                # Access conversation history
+                messages = chat.get_messages()
+                print(f"Conversation has {len(messages)} messages")
+        """
+        self._client.track('$ld:ai:config:function:createChat', context, key, 1)
+        if self._logger:
+            self._logger.debug(f"Creating chat for key: {key}")
+        config = self.completion_config(key, context, default_value, variables)
+
+        if not config.enabled or not config.tracker:
+            # Would log info if logger available
+            return None
+
+        provider = await AIProviderFactory.create(config, self._logger, default_ai_provider)
+        if not provider:
+            return None
+
+        judges = {}
+        if config.judge_configuration and config.judge_configuration.judges:
+            judges = await self._initialize_judges(
+                config.judge_configuration.judges,
+                context,
+                variables,
+                default_ai_provider,
+            )
+
+        return TrackedChat(config, config.tracker, provider, judges, self._logger)
+
+    def agent_config(
+        self,
+        key: str,
+        context: Context,
+        default_value: AIAgentConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+    ) -> AIAgentConfig:
+        """
+        Retrieve a single AI Config agent.
+
+        This method retrieves a single agent configuration with instructions
+        dynamically interpolated using the provided variables and context data.
+
+        Example::
+
+            agent = client.agent_config(
+                'research_agent',
+                context,
+                AIAgentConfigDefault(
+                    enabled=True,
+                    model=ModelConfig('gpt-4'),
+                    instructions="You are a research assistant specializing in {{topic}}."
+                ),
+                variables={'topic': 'climate change'}
+            )
+
+            if agent.enabled:
+                research_result = agent.instructions  # Interpolated instructions
+                agent.tracker.track_success()
+
+        :param key: The agent configuration key.
+        :param context: The context to evaluate the agent configuration in.
+        :param default_value: Default agent values.
+        :param variables: Variables for interpolation.
+        :return: Configured AIAgentConfig instance.
+        """
+        # Track single agent usage
+        self._client.track(
+            "$ld:ai:agent:function:single",
+            context,
+            key,
+            1
+        )
+
+        return self.__evaluate_agent(key, context, default_value, variables)
+
+    def agent(
+        self,
+        config: AIAgentConfigRequest,
+        context: Context,
+    ) -> AIAgentConfig:
+        """
+        Retrieve a single AI Config agent.
+
+        .. deprecated:: Use :meth:`agent_config` instead. This method will be removed in a future version.
+
+        :param config: The agent configuration to use.
+        :param context: The context to evaluate the agent configuration in.
+        :return: Configured AIAgentConfig instance.
+        """
+        return self.agent_config(config.key, context, config.default_value, config.variables)
+
+    def agent_configs(
+        self,
+        agent_configs: List[AIAgentConfigRequest],
+        context: Context,
+    ) -> AIAgents:
+        """
+        Retrieve multiple AI agent configurations.
+
+        This method allows you to retrieve multiple agent configurations in a single call,
+        with each agent having its own default configuration and variables for instruction
+        interpolation.
+
+        Example::
+
+            agents = client.agent_configs([
+                AIAgentConfigRequest(
+                    key='research_agent',
+                    default_value=AIAgentConfigDefault(
+                        enabled=True,
+                        instructions='You are a research assistant.'
+                    ),
+                    variables={'topic': 'climate change'}
+                ),
+                AIAgentConfigRequest(
+                    key='writing_agent',
+                    default_value=AIAgentConfigDefault(
+                        enabled=True,
+                        instructions='You are a writing assistant.'
+                    ),
+                    variables={'style': 'academic'}
+                )
+            ], context)
+
+            research_result = agents["research_agent"].instructions
+            agents["research_agent"].tracker.track_success()
+
+        :param agent_configs: List of agent configurations to retrieve.
+        :param context: The context to evaluate the agent configurations in.
+        :return: Dictionary mapping agent keys to their AIAgentConfig configurations.
+        """
+        # Track multiple agents usage
+        agent_count = len(agent_configs)
+        self._client.track(
+            "$ld:ai:agent:function:multiple",
+            context,
+            agent_count,
+            agent_count
+        )
+
+        result: AIAgents = {}
+
+        for config in agent_configs:
+            agent = self.__evaluate_agent(
+                config.key,
+                context,
+                config.default_value,
+                config.variables
+            )
+            result[config.key] = agent
+
+        return result
+
+    def agents(
+        self,
+        agent_configs: List[AIAgentConfigRequest],
+        context: Context,
+    ) -> AIAgents:
+        """
+        Retrieve multiple AI agent configurations.
+
+        .. deprecated:: Use :meth:`agent_configs` instead. This method will be removed in a future version.
+
+        :param agent_configs: List of agent configurations to retrieve.
+        :param context: The context to evaluate the agent configurations in.
+        :return: Dictionary mapping agent keys to their AIAgentConfig configurations.
+        """
+        return self.agent_configs(agent_configs, context)
+
+    def __evaluate(
+        self,
+        key: str,
+        context: Context,
+        default_dict: Dict[str, Any],
+        variables: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[Optional[ModelConfig], Optional[ProviderConfig], Optional[List[LDMessage]], Optional[str], LDAIConfigTracker, bool, Optional[Any]]:
+        """
+        Internal method to evaluate a configuration and extract components.
+
+        :param key: The configuration key.
+        :param context: The evaluation context.
+        :param default_dict: Default configuration as dictionary.
+        :param variables: Variables for interpolation.
+        :return: Tuple of (model, provider, messages, instructions, tracker, enabled).
+        """
+        variation = self._client.variation(key, context, default_dict)
+
+        all_variables = {}
+        if variables:
+            all_variables.update(variables)
+        all_variables['ldctx'] = context.to_dict()
+
+        # Extract messages
+        messages = None
+        if 'messages' in variation and isinstance(variation['messages'], list) and all(
+            isinstance(entry, dict) for entry in variation['messages']
+        ):
+            messages = [
+                LDMessage(
+                    role=entry['role'],
+                    content=self.__interpolate_template(
+                        entry['content'], all_variables
+                    ),
+                )
+                for entry in variation['messages']
+            ]
+
+        # Extract instructions
+        instructions = None
+        if 'instructions' in variation and isinstance(variation['instructions'], str):
+            instructions = self.__interpolate_template(variation['instructions'], all_variables)
+
+        # Extract provider config
+        provider_config = None
+        if 'provider' in variation and isinstance(variation['provider'], dict):
+            provider = variation['provider']
+            provider_config = ProviderConfig(provider.get('name', ''))
+
+        # Extract model config
+        model = None
+        if 'model' in variation and isinstance(variation['model'], dict):
+            parameters = variation['model'].get('parameters', None)
+            custom = variation['model'].get('custom', None)
+            model = ModelConfig(
+                name=variation['model']['name'],
+                parameters=parameters,
+                custom=custom
+            )
+
+        # Create tracker
+        tracker = LDAIConfigTracker(
+            self._client,
+            variation.get('_ldMeta', {}).get('variationKey', ''),
+            key,
+            int(variation.get('_ldMeta', {}).get('version', 1)),
+            model.name if model else '',
+            provider_config.name if provider_config else '',
+            context,
+        )
+
+        enabled = variation.get('_ldMeta', {}).get('enabled', False)
+
+        # Extract judge configuration
+        judge_configuration = None
+        if 'judgeConfiguration' in variation and isinstance(variation['judgeConfiguration'], dict):
+            judge_config = variation['judgeConfiguration']
+            if 'judges' in judge_config and isinstance(judge_config['judges'], list):
+                judges = [
+                    JudgeConfiguration.Judge(
+                        key=judge['key'],
+                        sampling_rate=judge['samplingRate']
+                    )
+                    for judge in judge_config['judges']
+                    if isinstance(judge, dict) and 'key' in judge and 'samplingRate' in judge
+                ]
+                if judges:
+                    judge_configuration = JudgeConfiguration(judges=judges)
+
+        return model, provider_config, messages, instructions, tracker, enabled, judge_configuration
+
+    def __evaluate_agent(
+        self,
+        key: str,
+        context: Context,
+        default_value: AIAgentConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+    ) -> AIAgentConfig:
+        """
+        Internal method to evaluate an agent configuration.
+
+        :param key: The agent configuration key.
+        :param context: The evaluation context.
+        :param default_value: Default agent values.
+        :param variables: Variables for interpolation.
+        :return: Configured AIAgentConfig instance.
+        """
+        model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate(
+            key, context, default_value.to_dict(), variables
+        )
+
+        # For agents, prioritize instructions over messages
+        final_instructions = instructions if instructions is not None else default_value.instructions
+
+        return AIAgentConfig(
+            enabled=bool(enabled) if enabled is not None else (default_value.enabled or False),
+            model=model or default_value.model,
+            provider=provider or default_value.provider,
+            instructions=final_instructions,
+            tracker=tracker,
+            judge_configuration=judge_configuration or default_value.judge_configuration,
+        )
+
+    def __interpolate_template(self, template: str, variables: Dict[str, Any]) -> str:
+        """
+        Interpolate the template with the given variables using Mustache format.
+
+        :param template: The template string.
+        :param variables: The variables to interpolate into the template.
+        :return: The interpolated string.
+        """
+        return chevron.render(template, variables)
diff --git a/packages/core/ldai/judge/__init__.py b/packages/core/ldai/judge/__init__.py
new file mode 100644
index 0000000..4ab4df4
--- /dev/null
+++ b/packages/core/ldai/judge/__init__.py
@@ -0,0 +1,7 @@
+"""Judge module for LaunchDarkly AI SDK."""
+
+from ldai.judge.ai_judge import AIJudge
+
+__all__ = ['AIJudge']
+
+
diff --git a/packages/core/ldai/judge/ai_judge.py b/packages/core/ldai/judge/ai_judge.py
new file mode 100644
index 0000000..20efbf8
--- /dev/null
+++ b/packages/core/ldai/judge/ai_judge.py
@@ -0,0 +1,230 @@
+"""Judge implementation for AI evaluation."""
+
+import random
+from typing import Any, Dict, Optional
+
+import chevron
+
+from ldai.models import AIJudgeConfig, LDMessage
+from ldai.providers.ai_provider import AIProvider
+from ldai.providers.types import ChatResponse, EvalScore, JudgeResponse, StructuredResponse
+from ldai.tracker import LDAIConfigTracker
+from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder
+
+
+class AIJudge:
+    """
+    Judge implementation that handles evaluation functionality and conversation management.
+    
+    According to the AIEval spec, judges are AI Configs with mode: "judge" that evaluate
+    other AI Configs using structured output.
+    """
+
+    def __init__(
+        self,
+        ai_config: AIJudgeConfig,
+        ai_config_tracker: LDAIConfigTracker,
+        ai_provider: AIProvider,
+        logger: Optional[Any] = None,
+    ):
+        """
+        Initialize the Judge.
+        
+        :param ai_config: The judge AI configuration
+        :param ai_config_tracker: The tracker for the judge configuration
+        :param ai_provider: The AI provider to use for evaluation
+        :param logger: Optional logger for logging
+        """
+        self._ai_config = ai_config
+        self._ai_config_tracker = ai_config_tracker
+        self._ai_provider = ai_provider
+        self._logger = logger
+        self._evaluation_response_structure = EvaluationSchemaBuilder.build(
+            ai_config.evaluation_metric_keys
+        )
+
+    async def evaluate(
+        self,
+        input_text: str,
+        output_text: str,
+        sampling_rate: float = 1.0,
+    ) -> Optional[JudgeResponse]:
+        """
+        Evaluates an AI response using the judge's configuration.
+        
+        :param input_text: The input prompt or question that was provided to the AI
+        :param output_text: The AI-generated response to be evaluated
+        :param sampling_rate: Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1)
+        :return: Evaluation results or None if not sampled
+        """
+        try:
+            if not self._ai_config.evaluation_metric_keys or len(self._ai_config.evaluation_metric_keys) == 0:
+                if self._logger:
+                    self._logger.warn(
+                        'Judge configuration is missing required evaluationMetricKeys'
+                    )
+                return None
+
+            if not self._ai_config.messages:
+                if self._logger:
+                    self._logger.warn('Judge configuration must include messages')
+                return None
+
+            if random.random() > sampling_rate:
+                if self._logger:
+                    self._logger.debug(f'Judge evaluation skipped due to sampling rate: {sampling_rate}')
+                return None
+
+            messages = self._construct_evaluation_messages(input_text, output_text)
+
+            # Track metrics of the structured model invocation
+            response = await self._ai_config_tracker.track_metrics_of(
+                lambda result: result.metrics,
+                lambda: self._ai_provider.invoke_structured_model(messages, self._evaluation_response_structure)
+            )
+
+            success = response.metrics.success
+
+            evals = self._parse_evaluation_response(response.data)
+
+            if len(evals) != len(self._ai_config.evaluation_metric_keys):
+                if self._logger:
+                    self._logger.warn('Judge evaluation did not return all evaluations')
+                success = False
+
+            return JudgeResponse(
+                evals=evals,
+                success=success,
+            )
+        except Exception as error:
+            if self._logger:
+                self._logger.error(f'Judge evaluation failed: {error}')
+            return JudgeResponse(
+                evals={},
+                success=False,
+                error=str(error) if isinstance(error, Exception) else 'Unknown error',
+            )
+
+    async def evaluate_messages(
+        self,
+        messages: list[LDMessage],
+        response: ChatResponse,
+        sampling_ratio: float = 1.0,
+    ) -> Optional[JudgeResponse]:
+        """
+        Evaluates an AI response from chat messages and response.
+        
+        :param messages: Array of messages representing the conversation history
+        :param response: The AI response to be evaluated
+        :param sampling_ratio: Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1)
+        :return: Evaluation results or None if not sampled
+        """
+        input_text = '\r\n'.join([msg.content for msg in messages]) if messages else ''
+        output_text = response.message.content
+
+        return await self.evaluate(input_text, output_text, sampling_ratio)
+
+    def get_ai_config(self) -> AIJudgeConfig:
+        """
+        Returns the AI Config used by this judge.
+        
+        :return: The judge AI configuration
+        """
+        return self._ai_config
+
+    def get_tracker(self) -> LDAIConfigTracker:
+        """
+        Returns the tracker associated with this judge.
+        
+        :return: The tracker for the judge configuration
+        """
+        return self._ai_config_tracker
+
+    def get_provider(self) -> AIProvider:
+        """
+        Returns the AI provider used by this judge.
+        
+        :return: The AI provider
+        """
+        return self._ai_provider
+
+    def _construct_evaluation_messages(self, input_text: str, output_text: str) -> list[LDMessage]:
+        """
+        Constructs evaluation messages by combining judge's config messages with input/output.
+        
+        :param input_text: The input text
+        :param output_text: The output text to evaluate
+        :return: List of messages for evaluation
+        """
+        if not self._ai_config.messages:
+            return []
+
+        messages: list[LDMessage] = []
+        for msg in self._ai_config.messages:
+            # Interpolate message content with reserved variables
+            content = self._interpolate_message(msg.content, {
+                'message_history': input_text,
+                'response_to_evaluate': output_text,
+            })
+            messages.append(LDMessage(role=msg.role, content=content))
+
+        return messages
+
+    def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str:
+        """
+        Interpolates message content with variables using Mustache templating.
+        
+        :param content: The message content template
+        :param variables: Variables to interpolate
+        :return: Interpolated message content
+        """
+        # Use chevron (Mustache) for templating, with no escaping
+        return chevron.render(content, variables)
+
+    def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScore]:
+        """
+        Parses the structured evaluation response from the AI provider.
+        
+        :param data: The structured response data
+        :return: Dictionary of evaluation scores keyed by metric key
+        """
+        results: Dict[str, EvalScore] = {}
+
+        if not data.get('evaluations') or not isinstance(data['evaluations'], dict):
+            if self._logger:
+                self._logger.warn('Invalid response: missing or invalid evaluations object')
+            return results
+
+        evaluations = data['evaluations']
+
+        for metric_key in self._ai_config.evaluation_metric_keys:
+            evaluation = evaluations.get(metric_key)
+
+            if not evaluation or not isinstance(evaluation, dict):
+                if self._logger:
+                    self._logger.warn(f'Missing evaluation for metric key: {metric_key}')
+                continue
+
+            score = evaluation.get('score')
+            reasoning = evaluation.get('reasoning')
+
+            if not isinstance(score, (int, float)) or score < 0 or score > 1:
+                if self._logger:
+                    self._logger.warn(
+                        f'Invalid score evaluated for {metric_key}: {score}. '
+                        'Score must be a number between 0 and 1 inclusive'
+                    )
+                continue
+
+            if not isinstance(reasoning, str):
+                if self._logger:
+                    self._logger.warn(
+                        f'Invalid reasoning evaluated for {metric_key}: {reasoning}. '
+                        'Reasoning must be a string'
+                    )
+                continue
+
+            results[metric_key] = EvalScore(score=float(score), reasoning=reasoning)
+
+        return results
+
diff --git a/packages/core/ldai/judge/evaluation_schema_builder.py b/packages/core/ldai/judge/evaluation_schema_builder.py
new file mode 100644
index 0000000..1965e64
--- /dev/null
+++ b/packages/core/ldai/judge/evaluation_schema_builder.py
@@ -0,0 +1,75 @@
+"""Internal class for building dynamic evaluation response schemas."""
+
+from typing import Any, Dict
+
+
+class EvaluationSchemaBuilder:
+    """
+    Internal class for building dynamic evaluation response schemas.
+    Not exported - only used internally by Judge.
+    """
+
+    @staticmethod
+    def build(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
+        """
+        Build an evaluation response schema from evaluation metric keys.
+        
+        :param evaluation_metric_keys: List of evaluation metric keys
+        :return: Schema dictionary for structured output
+        """
+        return {
+            'title': 'EvaluationResponse',
+            'description': f"Response containing evaluation results for {', '.join(evaluation_metric_keys)} metrics",
+            'type': 'object',
+            'properties': {
+                'evaluations': {
+                    'type': 'object',
+                    'description': f"Object containing evaluation results for {', '.join(evaluation_metric_keys)} metrics",
+                    'properties': EvaluationSchemaBuilder._build_key_properties(evaluation_metric_keys),
+                    'required': evaluation_metric_keys,
+                    'additionalProperties': False,
+                },
+            },
+            'required': ['evaluations'],
+            'additionalProperties': False,
+        }
+
+    @staticmethod
+    def _build_key_properties(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
+        """
+        Build properties for each evaluation metric key.
+        
+        :param evaluation_metric_keys: List of evaluation metric keys
+        :return: Dictionary of properties for each key
+        """
+        result: Dict[str, Any] = {}
+        for key in evaluation_metric_keys:
+            result[key] = EvaluationSchemaBuilder._build_key_schema(key)
+        return result
+
+    @staticmethod
+    def _build_key_schema(key: str) -> Dict[str, Any]:
+        """
+        Build schema for a single evaluation metric key.
+        
+        :param key: Evaluation metric key
+        :return: Schema dictionary for the key
+        """
+        return {
+            'type': 'object',
+            'properties': {
+                'score': {
+                    'type': 'number',
+                    'minimum': 0,
+                    'maximum': 1,
+                    'description': f'Score between 0.0 and 1.0 for {key}',
+                },
+                'reasoning': {
+                    'type': 'string',
+                    'description': f'Reasoning behind the score for {key}',
+                },
+            },
+            'required': ['score', 'reasoning'],
+            'additionalProperties': False,
+        }
+
diff --git a/packages/core/ldai/models.py b/packages/core/ldai/models.py
new file mode 100644
index 0000000..fa36f8c
--- /dev/null
+++ b/packages/core/ldai/models.py
@@ -0,0 +1,363 @@
+import warnings
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Literal, Optional, Union
+
+from ldai.tracker import LDAIConfigTracker
+
+
+@dataclass
+class LDMessage:
+    role: Literal['system', 'user', 'assistant']
+    content: str
+
+    def to_dict(self) -> dict:
+        """
+        Render the given message as a dictionary object.
+        """
+        return {
+            'role': self.role,
+            'content': self.content,
+        }
+
+
+class ModelConfig:
+    """
+    Configuration related to the model.
+    """
+
+    def __init__(self, name: str, parameters: Optional[Dict[str, Any]] = None, custom: Optional[Dict[str, Any]] = None):
+        """
+        :param name: The name of the model.
+        :param parameters: Additional model-specific parameters.
+        :param custom: Additional customer provided data.
+        """
+        self._name = name
+        self._parameters = parameters
+        self._custom = custom
+
+    @property
+    def name(self) -> str:
+        """
+        The name of the model.
+        """
+        return self._name
+
+    def get_parameter(self, key: str) -> Any:
+        """
+        Retrieve model-specific parameters.
+
+        Accessing a named, typed attribute (e.g. name) will result in the call
+        being delegated to the appropriate property.
+        """
+        if key == 'name':
+            return self.name
+
+        if self._parameters is None:
+            return None
+
+        return self._parameters.get(key)
+
+    def get_custom(self, key: str) -> Any:
+        """
+        Retrieve customer provided data.
+        """
+        if self._custom is None:
+            return None
+
+        return self._custom.get(key)
+
+    def to_dict(self) -> dict:
+        """
+        Render the given model config as a dictionary object.
+        """
+        return {
+            'name': self._name,
+            'parameters': self._parameters,
+            'custom': self._custom,
+        }
+
+
+class ProviderConfig:
+    """
+    Configuration related to the provider.
+    """
+
+    def __init__(self, name: str):
+        self._name = name
+
+    @property
+    def name(self) -> str:
+        """
+        The name of the provider.
+        """
+        return self._name
+
+    def to_dict(self) -> dict:
+        """
+        Render the given provider config as a dictionary object.
+        """
+        return {
+            'name': self._name,
+        }
+
+
+# ============================================================================
+# Judge Types
+# ============================================================================
+
+@dataclass(frozen=True)
+class JudgeConfiguration:
+    """
+    Configuration for judge attachment to AI Configs.
+    """
+    
+    @dataclass(frozen=True)
+    class Judge:
+        """
+        Configuration for a single judge attachment.
+        """
+        key: str
+        sampling_rate: float
+
+        def to_dict(self) -> dict:
+            """
+            Render the judge as a dictionary object.
+            """
+            return {
+                'key': self.key,
+                'samplingRate': self.sampling_rate,
+            }
+    
+    judges: List['JudgeConfiguration.Judge']
+
+    def to_dict(self) -> dict:
+        """
+        Render the judge configuration as a dictionary object.
+        """
+        return {
+            'judges': [judge.to_dict() for judge in self.judges],
+        }
+
+
+# ============================================================================
+# Base AI Config Types
+# ============================================================================
+
+@dataclass(frozen=True)
+class AIConfigDefault:
+    """
+    Base AI Config interface for default implementations with optional enabled property.
+    """
+    enabled: Optional[bool] = None
+    model: Optional[ModelConfig] = None
+    provider: Optional[ProviderConfig] = None
+
+    def _base_to_dict(self) -> Dict[str, Any]:
+        """
+        Render the base config fields as a dictionary object.
+        """
+        return {
+            '_ldMeta': {
+                'enabled': self.enabled or False,
+            },
+            'model': self.model.to_dict() if self.model else None,
+            'provider': self.provider.to_dict() if self.provider else None,
+        }
+
+
+@dataclass(frozen=True)
+class AIConfig:
+    """
+    Base AI Config interface without mode-specific fields.
+    """
+    enabled: bool
+    model: Optional[ModelConfig] = None
+    provider: Optional[ProviderConfig] = None
+    tracker: Optional[LDAIConfigTracker] = None
+
+    def _base_to_dict(self) -> Dict[str, Any]:
+        """
+        Render the base config fields as a dictionary object.
+        """
+        return {
+            '_ldMeta': {
+                'enabled': self.enabled,
+            },
+            'model': self.model.to_dict() if self.model else None,
+            'provider': self.provider.to_dict() if self.provider else None,
+        }
+
+
+# ============================================================================
+# Completion Config Types
+# ============================================================================
+
+@dataclass(frozen=True)
+class AICompletionConfigDefault(AIConfigDefault):
+    """
+    Default Completion AI Config (default mode).
+    """
+    messages: Optional[List[LDMessage]] = None
+    judge_configuration: Optional[JudgeConfiguration] = None
+
+    def to_dict(self) -> dict:
+        """
+        Render the given default values as an AICompletionConfigDefault-compatible dictionary object.
+        """
+        result = self._base_to_dict()
+        result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
+        if self.judge_configuration is not None:
+            result['judgeConfiguration'] = self.judge_configuration.to_dict()
+        return result
+
+
+@dataclass(frozen=True)
+class AICompletionConfig(AIConfig):
+    """
+    Completion AI Config (default mode).
+    """
+    messages: Optional[List[LDMessage]] = None
+    judge_configuration: Optional[JudgeConfiguration] = None
+
+    def to_dict(self) -> dict:
+        """
+        Render the given completion config as a dictionary object.
+        """
+        result = self._base_to_dict()
+        result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
+        if self.judge_configuration is not None:
+            result['judgeConfiguration'] = self.judge_configuration.to_dict()
+        return result
+
+
+# ============================================================================
+# Agent Config Types
+# ============================================================================
+
+@dataclass(frozen=True)
+class AIAgentConfigDefault(AIConfigDefault):
+    """
+    Default Agent-specific AI Config with instructions.
+    """
+    instructions: Optional[str] = None
+    judge_configuration: Optional[JudgeConfiguration] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Render the given agent config default as a dictionary object.
+        """
+        result = self._base_to_dict()
+        if self.instructions is not None:
+            result['instructions'] = self.instructions
+        if self.judge_configuration is not None:
+            result['judgeConfiguration'] = self.judge_configuration.to_dict()
+        return result
+
+
+@dataclass(frozen=True)
+class AIAgentConfig(AIConfig):
+    """
+    Agent-specific AI Config with instructions.
+    """
+    instructions: Optional[str] = None
+    judge_configuration: Optional[JudgeConfiguration] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Render the given agent config as a dictionary object.
+        """
+        result = self._base_to_dict()
+        if self.instructions is not None:
+            result['instructions'] = self.instructions
+        if self.judge_configuration is not None:
+            result['judgeConfiguration'] = self.judge_configuration.to_dict()
+        return result
+
+
+# ============================================================================
+# Judge Config Types
+# ============================================================================
+
+@dataclass(frozen=True)
+class AIJudgeConfigDefault(AIConfigDefault):
+    """
+    Default Judge-specific AI Config with required evaluation metric key.
+    """
+    messages: Optional[List[LDMessage]] = None
+    evaluation_metric_keys: Optional[List[str]] = None
+
+    def to_dict(self) -> dict:
+        """
+        Render the given judge config default as a dictionary object.
+        """
+        result = self._base_to_dict()
+        result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
+        if self.evaluation_metric_keys is not None:
+            result['evaluationMetricKeys'] = self.evaluation_metric_keys
+        return result
+
+
+@dataclass(frozen=True)
+class AIJudgeConfig(AIConfig):
+    """
+    Judge-specific AI Config with required evaluation metric key.
+    """
+    evaluation_metric_keys: List[str] = field(default_factory=list)
+    messages: Optional[List[LDMessage]] = None
+
+    def to_dict(self) -> dict:
+        """
+        Render the given judge config as a dictionary object.
+        """
+        result = self._base_to_dict()
+        result['evaluationMetricKeys'] = self.evaluation_metric_keys
+        result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
+        return result
+
+
+# ============================================================================
+# Agent Request Config
+# ============================================================================
+
+@dataclass
+class AIAgentConfigRequest:
+    """
+    Configuration for a single agent request.
+
+    Combines agent key with its specific default configuration and variables.
+    """
+    key: str
+    default_value: AIAgentConfigDefault
+    variables: Optional[Dict[str, Any]] = None
+
+
+# Type alias for multiple agents
+AIAgents = Dict[str, AIAgentConfig]
+
+# Type alias for all AI Config variants
+AIConfigKind = Union[AIAgentConfig, AICompletionConfig, AIJudgeConfig]
+
+
+# ============================================================================
+# Deprecated Type Aliases for Backward Compatibility
+# ============================================================================
+
+# Note: These are type aliases that point to the new types.
+# Since Python uses duck typing, these will work at runtime even if type checkers complain.
+# The old AIConfig had optional enabled, so it maps to AICompletionConfigDefault
+# The old AIConfig return type had required enabled, so it maps to AICompletionConfig
+
+# Deprecated: Use AICompletionConfigDefault instead
+# This was the old AIConfig with optional enabled (used as input/default)
+# Note: We map to AICompletionConfigDefault since the old AIConfig had optional enabled
+AIConfig = AICompletionConfigDefault
+
+# Deprecated: Use AIAgentConfigDefault instead
+LDAIAgentDefaults = AIAgentConfigDefault
+
+# Deprecated: Use AIAgentConfigRequest instead
+LDAIAgentConfig = AIAgentConfigRequest
+
+# Deprecated: Use AIAgentConfig instead (note: this was the old return type)
+LDAIAgent = AIAgentConfig
+
diff --git a/packages/core/ldai/providers/ai_provider.py b/packages/core/ldai/providers/ai_provider.py
new file mode 100644
index 0000000..daf56c6
--- /dev/null
+++ b/packages/core/ldai/providers/ai_provider.py
@@ -0,0 +1,91 @@
+"""Abstract base class for AI providers."""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional, Union
+
+from ldai.models import AIConfigKind, LDMessage
+from ldai.providers.types import ChatResponse, LDAIMetrics, StructuredResponse
+
+
+class AIProvider(ABC):
+    """
+    Abstract base class for AI providers that implement chat model functionality.
+    
+    This class provides the contract that all provider implementations must follow
+    to integrate with LaunchDarkly's tracking and configuration capabilities.
+    
+    Following the AICHAT spec recommendation to use base classes with non-abstract methods
+    for better extensibility and backwards compatibility.
+    """
+
+    def __init__(self, logger: Optional[Any] = None):
+        """
+        Initialize the AI provider.
+        
+        :param logger: Optional logger for logging provider operations.
+        """
+        self.logger = logger
+
+    async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
+        """
+        Invoke the chat model with an array of messages.
+        
+        This method should convert messages to provider format, invoke the model,
+        and return a ChatResponse with the result and metrics.
+        
+        Default implementation takes no action and returns a placeholder response.
+        Provider implementations should override this method.
+        
+        :param messages: Array of LDMessage objects representing the conversation
+        :return: ChatResponse containing the model's response
+        """
+        if self.logger:
+            self.logger.warn('invokeModel not implemented by this provider')
+        
+        return ChatResponse(
+            message=LDMessage(role='assistant', content=''),
+            metrics=LDAIMetrics(success=False, usage=None),
+        )
+
+    async def invoke_structured_model(
+        self,
+        messages: List[LDMessage],
+        response_structure: Dict[str, Any],
+    ) -> StructuredResponse:
+        """
+        Invoke the chat model with structured output support.
+        
+        This method should convert messages to provider format, invoke the model with
+        structured output configuration, and return a structured response.
+        
+        Default implementation takes no action and returns a placeholder response.
+        Provider implementations should override this method.
+        
+        :param messages: Array of LDMessage objects representing the conversation
+        :param response_structure: Dictionary of output configurations keyed by output name
+        :return: StructuredResponse containing the structured data
+        """
+        if self.logger:
+            self.logger.warn('invokeStructuredModel not implemented by this provider')
+        
+        return StructuredResponse(
+            data={},
+            raw_response='',
+            metrics=LDAIMetrics(success=False, usage=None),
+        )
+
+    @staticmethod
+    @abstractmethod
+    async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'AIProvider':
+        """
+        Static method that constructs an instance of the provider.
+        
+        Each provider implementation must provide their own static create method
+        that accepts an AIConfigKind and returns a configured instance.
+        
+        :param ai_config: The LaunchDarkly AI configuration
+        :param logger: Optional logger for the provider
+        :return: Configured provider instance
+        """
+        raise NotImplementedError('Provider implementations must override the static create method')
+
diff --git a/packages/core/ldai/providers/ai_provider_factory.py b/packages/core/ldai/providers/ai_provider_factory.py
new file mode 100644
index 0000000..bd17485
--- /dev/null
+++ b/packages/core/ldai/providers/ai_provider_factory.py
@@ -0,0 +1,169 @@
+"""Factory for creating AIProvider instances based on the provider configuration."""
+
+import importlib
+from typing import Any, List, Literal, Optional, Type
+
+from ldai.models import AIConfigKind
+from ldai.providers.ai_provider import AIProvider
+
+
+# List of supported AI providers
+SUPPORTED_AI_PROVIDERS = [
+    # Multi-provider packages should be last in the list
+    'langchain',
+]
+
+# Type representing the supported AI providers
+SupportedAIProvider = Literal['langchain']
+
+
+class AIProviderFactory:
+    """
+    Factory for creating AIProvider instances based on the provider configuration.
+    """
+
+    @staticmethod
+    async def create(
+        ai_config: AIConfigKind,
+        logger: Optional[Any] = None,
+        default_ai_provider: Optional[SupportedAIProvider] = None,
+    ) -> Optional[AIProvider]:
+        """
+        Create an AIProvider instance based on the AI configuration.
+        
+        This method attempts to load provider-specific implementations dynamically.
+        Returns None if the provider is not supported.
+        
+        :param ai_config: The AI configuration
+        :param logger: Optional logger for logging provider initialization
+        :param default_ai_provider: Optional default AI provider to use
+        :return: AIProvider instance or None if not supported
+        """
+        provider_name = ai_config.provider.name.lower() if ai_config.provider else None
+        # Determine which providers to try based on default_ai_provider
+        providers_to_try = AIProviderFactory._get_providers_to_try(default_ai_provider, provider_name)
+
+        # Try each provider in order
+        for provider_type in providers_to_try:
+            provider = await AIProviderFactory._try_create_provider(provider_type, ai_config, logger)
+            if provider:
+                return provider
+
+        # If no provider was successfully created, log a warning
+        if logger:
+            logger.warn(
+                f"Provider is not supported or failed to initialize: {provider_name or 'unknown'}"
+            )
+        return None
+
+    @staticmethod
+    def _get_providers_to_try(
+        default_ai_provider: Optional[SupportedAIProvider],
+        provider_name: Optional[str],
+    ) -> List[SupportedAIProvider]:
+        """
+        Determine which providers to try based on default_ai_provider and provider_name.
+        
+        :param default_ai_provider: Optional default provider to use
+        :param provider_name: Optional provider name from config
+        :return: List of providers to try in order
+        """
+        # If default_ai_provider is set, only try that specific provider
+        if default_ai_provider:
+            return [default_ai_provider]
+
+        # If no default_ai_provider is set, try all providers in order
+        provider_set = set()
+
+        # First try the specific provider if it's supported
+        if provider_name and provider_name in SUPPORTED_AI_PROVIDERS:
+            provider_set.add(provider_name)  # type: ignore
+
+        # Then try multi-provider packages, but avoid duplicates
+        multi_provider_packages: List[SupportedAIProvider] = ['langchain']
+        for provider in multi_provider_packages:
+            provider_set.add(provider)
+
+        return list(provider_set)
+
+    @staticmethod
+    async def _try_create_provider(
+        provider_type: SupportedAIProvider,
+        ai_config: AIConfigKind,
+        logger: Optional[Any] = None,
+    ) -> Optional[AIProvider]:
+        """
+        Try to create a provider of the specified type.
+        
+        :param provider_type: Type of provider to create
+        :param ai_config: AI configuration
+        :param logger: Optional logger
+        :return: AIProvider instance or None if creation failed
+        """
+        # Handle built-in providers (part of this package)
+        if provider_type == 'langchain':
+            try:
+                from ldai.providers.langchain import LangChainProvider
+                return await LangChainProvider.create(ai_config, logger)
+            except ImportError as error:
+                if logger:
+                    logger.warn(
+                        f"Error creating LangChainProvider: {error}. "
+                        f"Make sure langchain and langchain-core packages are installed."
+                    )
+                return None
+
+        # TODO: REL-10773 OpenAI provider
+        # TODO: REL-10776 Vercel provider
+        # For future external providers, use dynamic import
+        provider_mappings = {
+            # 'openai': ('launchdarkly_server_sdk_ai_openai', 'OpenAIProvider'),
+            # 'vercel': ('launchdarkly_server_sdk_ai_vercel', 'VercelProvider'),
+        }
+
+        if provider_type not in provider_mappings:
+            return None
+
+        package_name, provider_class_name = provider_mappings[provider_type]
+        return await AIProviderFactory._create_provider(
+            package_name, provider_class_name, ai_config, logger
+        )
+
+    @staticmethod
+    async def _create_provider(
+        package_name: str,
+        provider_class_name: str,
+        ai_config: AIConfigKind,
+        logger: Optional[Any] = None,
+    ) -> Optional[AIProvider]:
+        """
+        Create a provider instance dynamically.
+        
+        :param package_name: Name of the package containing the provider
+        :param provider_class_name: Name of the provider class
+        :param ai_config: AI configuration
+        :param logger: Optional logger
+        :return: AIProvider instance or None if creation failed
+        """
+        try:
+            # Try to dynamically import the provider
+            # This will work if the package is installed
+            module = importlib.import_module(package_name)
+            provider_class: Type[AIProvider] = getattr(module, provider_class_name)
+
+            provider = await provider_class.create(ai_config, logger)
+            if logger:
+                logger.debug(
+                    f"Successfully created AIProvider for: {ai_config.provider.name if ai_config.provider else 'unknown'} "
+                    f"with package {package_name}"
+                )
+            return provider
+        except (ImportError, AttributeError, Exception) as error:
+            # If the provider is not available or creation fails, return None
+            if logger:
+                logger.warn(
+                    f"Error creating AIProvider for: {ai_config.provider.name if ai_config.provider else 'unknown'} "
+                    f"with package {package_name}: {error}"
+                )
+            return None
+
diff --git a/packages/core/ldai/providers/types.py b/packages/core/ldai/providers/types.py
new file mode 100644
index 0000000..45df755
--- /dev/null
+++ b/packages/core/ldai/providers/types.py
@@ -0,0 +1,92 @@
+"""Types for AI provider responses."""
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+from ldai.models import LDMessage
+from ldai.tracker import TokenUsage
+
+
+@dataclass
+class LDAIMetrics:
+    """
+    Metrics information for AI operations that includes success status and token usage.
+    """
+    success: bool
+    usage: Optional[TokenUsage] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Render the metrics as a dictionary object.
+        """
+        result: Dict[str, Any] = {
+            'success': self.success,
+        }
+        if self.usage is not None:
+            result['usage'] = {
+                'total': self.usage.total,
+                'input': self.usage.input,
+                'output': self.usage.output,
+            }
+        return result
+
+
+@dataclass
+class ChatResponse:
+    """
+    Chat response structure.
+    """
+    message: LDMessage
+    metrics: LDAIMetrics
+    evaluations: Optional[List[Any]] = None  # List of JudgeResponse, will be populated later
+
+
+@dataclass
+class StructuredResponse:
+    """
+    Structured response from AI models.
+    """
+    data: Dict[str, Any]
+    raw_response: str
+    metrics: LDAIMetrics
+
+
+@dataclass
+class EvalScore:
+    """
+    Score and reasoning for a single evaluation metric.
+    """
+    score: float  # Score between 0.0 and 1.0
+    reasoning: str  # Reasoning behind the provided score
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Render the evaluation score as a dictionary object.
+        """
+        return {
+            'score': self.score,
+            'reasoning': self.reasoning,
+        }
+
+
+@dataclass
+class JudgeResponse:
+    """
+    Response from a judge evaluation containing scores and reasoning for multiple metrics.
+    """
+    evals: Dict[str, EvalScore]  # Dictionary where keys are metric names and values contain score and reasoning
+    success: bool  # Whether the evaluation completed successfully
+    error: Optional[str] = None  # Error message if evaluation failed
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Render the judge response as a dictionary object.
+        """
+        result: Dict[str, Any] = {
+            'evals': {key: eval_score.to_dict() for key, eval_score in self.evals.items()},
+            'success': self.success,
+        }
+        if self.error is not None:
+            result['error'] = self.error
+        return result
+
diff --git a/packages/core/ldai/tracker.py b/packages/core/ldai/tracker.py
new file mode 100644
index 0000000..632f0f4
--- /dev/null
+++ b/packages/core/ldai/tracker.py
@@ -0,0 +1,404 @@
+import time
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Dict, Optional
+
+from ldclient import Context, LDClient
+
+
+class FeedbackKind(Enum):
+    """
+    Types of feedback that can be provided for AI operations.
+    """
+
+    Positive = "positive"
+    Negative = "negative"
+
+
+@dataclass
+class TokenUsage:
+    """
+    Tracks token usage for AI operations.
+
+    :param total: Total number of tokens used.
+    :param input: Number of tokens in the prompt.
+    :param output: Number of tokens in the completion.
+    """
+
+    total: int
+    input: int
+    output: int
+
+
+class LDAIMetricSummary:
+    """
+    Summary of metrics which have been tracked.
+    """
+
+    def __init__(self):
+        self._duration = None
+        self._success = None
+        self._feedback = None
+        self._usage = None
+        self._time_to_first_token = None
+
+    @property
+    def duration(self) -> Optional[int]:
+        return self._duration
+
+    @property
+    def success(self) -> Optional[bool]:
+        return self._success
+
+    @property
+    def feedback(self) -> Optional[Dict[str, FeedbackKind]]:
+        return self._feedback
+
+    @property
+    def usage(self) -> Optional[TokenUsage]:
+        return self._usage
+
+    @property
+    def time_to_first_token(self) -> Optional[int]:
+        return self._time_to_first_token
+
+
+class LDAIConfigTracker:
+    """
+    Tracks configuration and usage metrics for LaunchDarkly AI operations.
+    """
+
+    def __init__(
+        self,
+        ld_client: LDClient,
+        variation_key: str,
+        config_key: str,
+        version: int,
+        model_name: str,
+        provider_name: str,
+        context: Context,
+    ):
+        """
+        Initialize an AI Config tracker.
+
+        :param ld_client: LaunchDarkly client instance.
+        :param variation_key: Variation key for tracking.
+        :param config_key: Configuration key for tracking.
+        :param version: Version of the variation.
+        :param model_name: Name of the model used.
+        :param provider_name: Name of the provider used.
+        :param context: Context for evaluation.
+        """
+        self._ld_client = ld_client
+        self._variation_key = variation_key
+        self._config_key = config_key
+        self._version = version
+        self._model_name = model_name
+        self._provider_name = provider_name
+        self._context = context
+        self._summary = LDAIMetricSummary()
+
+    def __get_track_data(self):
+        """
+        Get tracking data for events.
+
+        :return: Dictionary containing variation and config keys.
+        """
+        return {
+            "variationKey": self._variation_key,
+            "configKey": self._config_key,
+            "version": self._version,
+            "modelName": self._model_name,
+            "providerName": self._provider_name,
+        }
+
+    def track_duration(self, duration: int) -> None:
+        """
+        Manually track the duration of an AI operation.
+
+        :param duration: Duration in milliseconds.
+        """
+        self._summary._duration = duration
+        self._ld_client.track(
+            "$ld:ai:duration:total", self._context, self.__get_track_data(), duration
+        )
+
+    def track_time_to_first_token(self, time_to_first_token: int) -> None:
+        """
+        Manually track the time to first token of an AI operation.
+
+        :param time_to_first_token: Time to first token in milliseconds.
+        """
+        self._summary._time_to_first_token = time_to_first_token
+        self._ld_client.track(
+            "$ld:ai:tokens:ttf",
+            self._context,
+            self.__get_track_data(),
+            time_to_first_token,
+        )
+
+    def track_duration_of(self, func):
+        """
+        Automatically track the duration of an AI operation.
+
+        An exception occurring during the execution of the function will still
+        track the duration. The exception will be re-thrown.
+
+        :param func: Function to track (synchronous only).
+        :return: Result of the tracked function.
+        """
+        start_time = time.time()
+        try:
+            result = func()
+        finally:
+            end_time = time.time()
+            duration = int((end_time - start_time) * 1000)  # duration in milliseconds
+            self.track_duration(duration)
+
+        return result
+
+    async def track_metrics_of(self, metrics_extractor, func):
+        """
+        Track metrics for a generic AI operation.
+
+        This function will track the duration of the operation, extract metrics using the provided
+        metrics extractor function, and track success or error status accordingly.
+
+        If the provided function throws, then this method will also throw.
+        In the case the provided function throws, this function will record the duration and an error.
+        A failed operation will not have any token usage data.
+
+        :param metrics_extractor: Function that extracts LDAIMetrics from the operation result
+        :param func: Async function which executes the operation
+        :return: The result of the operation
+        """
+        start_time = time.time()
+        result = None
+        try:
+            result = await func()
+        except Exception as err:
+            end_time = time.time()
+            duration = int((end_time - start_time) * 1000)
+            self.track_duration(duration)
+            self.track_error()
+            raise err
+
+        # Track duration after successful call
+        end_time = time.time()
+        duration = int((end_time - start_time) * 1000)
+        self.track_duration(duration)
+
+        # Extract metrics after successful AI call
+        from ldai.providers.types import LDAIMetrics
+        metrics = metrics_extractor(result)
+
+        # Track success/error based on metrics
+        if metrics.success:
+            self.track_success()
+        else:
+            self.track_error()
+
+        # Track token usage if available
+        if metrics.usage:
+            self.track_tokens(metrics.usage)
+
+        return result
+
+    def track_eval_scores(self, scores: Dict[str, Any]) -> None:
+        """
+        Track evaluation scores for multiple metrics.
+
+        :param scores: Dictionary mapping metric keys to their evaluation scores (EvalScore objects)
+        """
+        from ldai.providers.types import EvalScore
+        
+        # Track each evaluation score individually
+        for metric_key, eval_score in scores.items():
+            if isinstance(eval_score, EvalScore):
+                self._ld_client.track(
+                    metric_key,
+                    self._context,
+                    self.__get_track_data(),
+                    eval_score.score
+                )
+
+    def track_judge_response(self, judge_response: Any) -> None:
+        """
+        Track a judge response, including evaluation scores and success status.
+
+        :param judge_response: JudgeResponse object containing evals and success status
+        """
+        from ldai.providers.types import JudgeResponse
+        
+        if isinstance(judge_response, JudgeResponse):
+            # Track evaluation scores
+            if judge_response.evals:
+                self.track_eval_scores(judge_response.evals)
+            
+            # Track success/error based on judge response
+            if judge_response.success:
+                self.track_success()
+            else:
+                self.track_error()
+
+    def track_feedback(self, feedback: Dict[str, FeedbackKind]) -> None:
+        """
+        Track user feedback for an AI operation.
+
+        :param feedback: Dictionary containing feedback kind.
+        """
+        self._summary._feedback = feedback
+        if feedback["kind"] == FeedbackKind.Positive:
+            self._ld_client.track(
+                "$ld:ai:feedback:user:positive",
+                self._context,
+                self.__get_track_data(),
+                1,
+            )
+        elif feedback["kind"] == FeedbackKind.Negative:
+            self._ld_client.track(
+                "$ld:ai:feedback:user:negative",
+                self._context,
+                self.__get_track_data(),
+                1,
+            )
+
+    def track_success(self) -> None:
+        """
+        Track a successful AI generation.
+        """
+        self._summary._success = True
+        self._ld_client.track(
+            "$ld:ai:generation:success", self._context, self.__get_track_data(), 1
+        )
+
+    def track_error(self) -> None:
+        """
+        Track an unsuccessful AI generation attempt.
+        """
+        self._summary._success = False
+        self._ld_client.track(
+            "$ld:ai:generation:error", self._context, self.__get_track_data(), 1
+        )
+
+    async def track_openai_metrics(self, func):
+        """
+        Track OpenAI-specific operations.
+
+        This function will track the duration of the operation, the token
+        usage, and the success or error status.
+
+        If the provided function throws, then this method will also throw.
+
+        In the case the provided function throws, this function will record the
+        duration and an error.
+
+        A failed operation will not have any token usage data.
+
+        :param func: Async function to track.
+        :return: Result of the tracked function.
+        """
+        start_time = time.time()
+        try:
+            result = await func()
+            end_time = time.time()
+            duration = int((end_time - start_time) * 1000)
+            self.track_duration(duration)
+            self.track_success()
+            if hasattr(result, "usage") and hasattr(result.usage, "to_dict"):
+                self.track_tokens(_openai_to_token_usage(result.usage.to_dict()))
+        except Exception:
+            end_time = time.time()
+            duration = int((end_time - start_time) * 1000)
+            self.track_duration(duration)
+            self.track_error()
+            raise
+
+        return result
+
+    def track_bedrock_converse_metrics(self, res: dict) -> dict:
+        """
+        Track AWS Bedrock conversation operations.
+
+
+        This function will track the duration of the operation, the token
+        usage, and the success or error status.
+
+        :param res: Response dictionary from Bedrock.
+        :return: The original response dictionary.
+        """
+        status_code = res.get("ResponseMetadata", {}).get("HTTPStatusCode", 0)
+        if status_code == 200:
+            self.track_success()
+        elif status_code >= 400:
+            self.track_error()
+        if res.get("metrics", {}).get("latencyMs"):
+            self.track_duration(res["metrics"]["latencyMs"])
+        if res.get("usage"):
+            self.track_tokens(_bedrock_to_token_usage(res["usage"]))
+        return res
+
+    def track_tokens(self, tokens: TokenUsage) -> None:
+        """
+        Track token usage metrics.
+
+        :param tokens: Token usage data from either custom, OpenAI, or Bedrock sources.
+        """
+        self._summary._usage = tokens
+        if tokens.total > 0:
+            self._ld_client.track(
+                "$ld:ai:tokens:total",
+                self._context,
+                self.__get_track_data(),
+                tokens.total,
+            )
+        if tokens.input > 0:
+            self._ld_client.track(
+                "$ld:ai:tokens:input",
+                self._context,
+                self.__get_track_data(),
+                tokens.input,
+            )
+        if tokens.output > 0:
+            self._ld_client.track(
+                "$ld:ai:tokens:output",
+                self._context,
+                self.__get_track_data(),
+                tokens.output,
+            )
+
+    def get_summary(self) -> LDAIMetricSummary:
+        """
+        Get the current summary of AI metrics.
+
+        :return: Summary of AI metrics.
+        """
+        return self._summary
+
+
+def _bedrock_to_token_usage(data: dict) -> TokenUsage:
+    """
+    Convert a Bedrock usage dictionary to a TokenUsage object.
+
+    :param data: Dictionary containing Bedrock usage data.
+    :return: TokenUsage object containing usage data.
+    """
+    return TokenUsage(
+        total=data.get("totalTokens", 0),
+        input=data.get("inputTokens", 0),
+        output=data.get("outputTokens", 0),
+    )
+
+
+def _openai_to_token_usage(data: dict) -> TokenUsage:
+    """
+    Convert an OpenAI usage dictionary to a TokenUsage object.
+
+    :param data: Dictionary containing OpenAI usage data.
+    :return: TokenUsage object containing usage data.
+    """
+    return TokenUsage(
+        total=data.get("total_tokens", 0),
+        input=data.get("prompt_tokens", 0),
+        output=data.get("completion_tokens", 0),
+    )
diff --git a/packages/core/pyproject.toml b/packages/core/pyproject.toml
new file mode 100644
index 0000000..4cc0756
--- /dev/null
+++ b/packages/core/pyproject.toml
@@ -0,0 +1,69 @@
+[tool.poetry]
+name = "launchdarkly-server-sdk-ai"
+version = "0.10.1"
+description = "LaunchDarkly SDK for AI"
+authors = ["LaunchDarkly <dev@launchdarkly.com>"]
+license = "Apache-2.0"
+readme = "README.md"
+homepage = "https://docs.launchdarkly.com/sdk/ai/python"
+repository = "https://github.com/launchdarkly/python-server-sdk-ai"
+documentation = "https://launchdarkly-python-sdk-ai.readthedocs.io/en/latest/"
+classifiers = [
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Software Development",
+    "Topic :: Software Development :: Libraries",
+]
+packages = [ { include = "ldai" } ]
+
+[tool.poetry.dependencies]
+python = ">=3.9,<4"
+launchdarkly-server-sdk = ">=9.4.0"
+chevron = "=0.14.0"
+
+
+[tool.poetry.group.dev.dependencies]
+pytest = ">=2.8"
+pytest-cov = ">=2.4.0"
+pytest-mypy = "==1.0.1"
+pytest-asyncio = ">=0.21.0"
+mypy = "==1.18.2"
+pycodestyle = "^2.12.1"
+isort = ">=5.13.2,<7.0.0"
+
+
+[tool.poetry.group.docs]
+optional = true
+
+[tool.poetry.group.docs.dependencies]
+sphinx = ">=6,<8"
+sphinx-rtd-theme = ">=1.3,<4.0"
+certifi = ">=2018.4.16"
+expiringdict = ">=1.1.4"
+pyrfc3339 = ">=1.0"
+jsonpickle = ">1.4.1"
+semver = ">=2.7.9"
+urllib3 = ">=1.26.0"
+jinja2 = "3.1.6"
+
+[tool.mypy]
+python_version = "3.9"
+ignore_missing_imports = true
+install_types = true
+non_interactive = true
+
+
+[tool.pytest.ini_options]
+addopts = ["-ra"]
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/packages/core/tests/__init__.py b/packages/core/tests/__init__.py
new file mode 100644
index 0000000..1f7baa7
--- /dev/null
+++ b/packages/core/tests/__init__.py
@@ -0,0 +1,2 @@
+"""Tests for LaunchDarkly Server SDK for AI - Core package."""
+
diff --git a/packages/core/tests/test_agents.py b/packages/core/tests/test_agents.py
new file mode 100644
index 0000000..755f2e5
--- /dev/null
+++ b/packages/core/tests/test_agents.py
@@ -0,0 +1,342 @@
+import pytest
+from ldclient import Config, Context, LDClient
+from ldclient.integrations.test_data import TestData
+
+from ldai import (LDAIAgentConfig, LDAIAgentDefaults, LDAIClient, ModelConfig,
+                  ProviderConfig)
+
+
+@pytest.fixture
+def td() -> TestData:
+    td = TestData.data_source()
+
+    # Single agent with instructions
+    td.update(
+        td.flag('customer-support-agent')
+        .variations(
+            {
+                'model': {'name': 'gpt-4', 'parameters': {'temperature': 0.3, 'maxTokens': 2048}},
+                'provider': {'name': 'openai'},
+                'instructions': 'You are a helpful customer support agent for {{company_name}}. Always be polite and professional.',
+                '_ldMeta': {'enabled': True, 'variationKey': 'agent-v1', 'version': 1, 'mode': 'agent'},
+            }
+        )
+        .variation_for_all(0)
+    )
+
+    # Agent with context interpolation
+    td.update(
+        td.flag('personalized-agent')
+        .variations(
+            {
+                'model': {'name': 'claude-3', 'parameters': {'temperature': 0.5}},
+                'instructions': 'Hello {{ldctx.name}}! I am your personal assistant. Your user key is {{ldctx.key}}.',
+                '_ldMeta': {'enabled': True, 'variationKey': 'personal-v1', 'version': 2, 'mode': 'agent'},
+            }
+        )
+        .variation_for_all(0)
+    )
+
+    # Agent with multi-context interpolation
+    td.update(
+        td.flag('multi-context-agent')
+        .variations(
+            {
+                'model': {'name': 'gpt-3.5-turbo'},
+                'instructions': 'Welcome {{ldctx.user.name}} from {{ldctx.org.name}}! Your organization tier is {{ldctx.org.tier}}.',
+                '_ldMeta': {'enabled': True, 'variationKey': 'multi-v1', 'version': 1, 'mode': 'agent'},
+            }
+        )
+        .variation_for_all(0)
+    )
+
+    # Disabled agent
+    td.update(
+        td.flag('disabled-agent')
+        .variations(
+            {
+                'model': {'name': 'gpt-4'},
+                'instructions': 'This agent is disabled.',
+                '_ldMeta': {'enabled': False, 'variationKey': 'disabled-v1', 'version': 1, 'mode': 'agent'},
+            }
+        )
+        .variation_for_all(0)
+    )
+
+    # Agent with minimal metadata
+    td.update(
+        td.flag('minimal-agent')
+        .variations(
+            {
+                'instructions': 'Minimal agent configuration.',
+                '_ldMeta': {'enabled': True},
+            }
+        )
+        .variation_for_all(0)
+    )
+
+    # Sales assistant agent
+    td.update(
+        td.flag('sales-assistant')
+        .variations(
+            {
+                'model': {'name': 'gpt-4', 'parameters': {'temperature': 0.7}},
+                'provider': {'name': 'openai'},
+                'instructions': 'You are a sales assistant for {{company_name}}. Help customers find the right products.',
+                '_ldMeta': {'enabled': True, 'variationKey': 'sales-v1', 'version': 1, 'mode': 'agent'},
+            }
+        )
+        .variation_for_all(0)
+    )
+
+    # Research agent for testing single agent method
+    td.update(
+        td.flag('research-agent')
+        .variations(
+            {
+                'model': {'name': 'gpt-4', 'parameters': {'temperature': 0.2, 'maxTokens': 3000}},
+                'provider': {'name': 'openai'},
+                'instructions': 'You are a research assistant specializing in {{topic}}. Your expertise level should match {{ldctx.expertise}}.',
+                '_ldMeta': {'enabled': True, 'variationKey': 'research-v1', 'version': 1, 'mode': 'agent'},
+            }
+        )
+        .variation_for_all(0)
+    )
+
+    return td
+
+
+@pytest.fixture
+def client(td: TestData) -> LDClient:
+    config = Config('sdk-key', update_processor_class=td, send_events=False)
+    return LDClient(config=config)
+
+
+@pytest.fixture
+def ldai_client(client: LDClient) -> LDAIClient:
+    return LDAIClient(client)
+
+
+def test_single_agent_method(ldai_client: LDAIClient):
+    """Test the single agent() method functionality."""
+    context = Context.builder('user-key').set('expertise', 'advanced').build()
+    config = LDAIAgentConfig(
+        key='research-agent',
+        default_value=LDAIAgentDefaults(
+            enabled=False,
+            model=ModelConfig('fallback-model'),
+            instructions="Default instructions"
+        ),
+        variables={'topic': 'quantum computing'}
+    )
+
+    agent = ldai_client.agent(config, context)
+
+    assert agent.enabled is True
+    assert agent.model is not None
+    assert agent.model.name == 'gpt-4'
+    assert agent.model.get_parameter('temperature') == 0.2
+    assert agent.model.get_parameter('maxTokens') == 3000
+    assert agent.provider is not None
+    assert agent.provider.name == 'openai'
+    assert agent.instructions == 'You are a research assistant specializing in quantum computing. Your expertise level should match advanced.'
+    assert agent.tracker is not None
+
+
+def test_single_agent_with_defaults(ldai_client: LDAIClient):
+    """Test single agent method with non-existent flag using defaults."""
+    context = Context.create('user-key')
+    config = LDAIAgentConfig(
+        key='non-existent-agent',
+        default_value=LDAIAgentDefaults(
+            enabled=True,
+            model=ModelConfig('default-model', parameters={'temp': 0.8}),
+            provider=ProviderConfig('default-provider'),
+            instructions="You are a default assistant for {{task}}."
+        ),
+        variables={'task': 'general assistance'}
+    )
+
+    agent = ldai_client.agent(config, context)
+
+    assert agent.enabled is True
+    assert agent.model is not None and agent.model.name == 'default-model'
+    assert agent.model is not None and agent.model.get_parameter('temp') == 0.8
+    assert agent.provider is not None and agent.provider.name == 'default-provider'
+    assert agent.instructions == "You are a default assistant for general assistance."
+    assert agent.tracker is not None
+
+
+def test_agents_method_with_configs(ldai_client: LDAIClient):
+    """Test the new agents() method with LDAIAgentConfig objects."""
+    context = Context.create('user-key')
+
+    agent_configs = [
+        LDAIAgentConfig(
+            key='customer-support-agent',
+            default_value=LDAIAgentDefaults(
+                enabled=False,
+                model=ModelConfig('fallback-model'),
+                instructions="Default support"
+            ),
+            variables={'company_name': 'Acme Corp'}
+        ),
+        LDAIAgentConfig(
+            key='sales-assistant',
+            default_value=LDAIAgentDefaults(
+                enabled=False,
+                model=ModelConfig('fallback-model'),
+                instructions="Default sales"
+            ),
+            variables={'company_name': 'Acme Corp'}
+        )
+    ]
+
+    agents = ldai_client.agents(agent_configs, context)
+
+    assert len(agents) == 2
+    assert 'customer-support-agent' in agents
+    assert 'sales-assistant' in agents
+
+    support_agent = agents['customer-support-agent']
+    assert support_agent.enabled is True
+    assert support_agent.instructions is not None and 'Acme Corp' in support_agent.instructions
+
+    sales_agent = agents['sales-assistant']
+    assert sales_agent.enabled is True
+    assert sales_agent.instructions is not None and 'Acme Corp' in sales_agent.instructions
+    assert sales_agent.model is not None and sales_agent.model.get_parameter('temperature') == 0.7
+
+
+def test_agents_method_different_variables_per_agent(ldai_client: LDAIClient):
+    """Test agents method with different variables for each agent."""
+    context = Context.builder('user-key').name('Alice').build()
+
+    agent_configs = [
+        LDAIAgentConfig(
+            key='personalized-agent',
+            default_value=LDAIAgentDefaults(
+                enabled=True,
+                instructions="Default personal"
+            ),
+            variables={}  # Will use context only
+        ),
+        LDAIAgentConfig(
+            key='customer-support-agent',
+            default_value=LDAIAgentDefaults(
+                enabled=True,
+                instructions="Default support"
+            ),
+            variables={'company_name': 'TechStart Inc'}
+        )
+    ]
+
+    agents = ldai_client.agents(agent_configs, context)
+
+    personal_agent = agents['personalized-agent']
+    assert personal_agent.instructions == 'Hello Alice! I am your personal assistant. Your user key is user-key.'
+
+    support_agent = agents['customer-support-agent']
+    assert support_agent.instructions == 'You are a helpful customer support agent for TechStart Inc. Always be polite and professional.'
+
+
+def test_agents_with_multi_context_interpolation(ldai_client: LDAIClient):
+    """Test agents method with multi-context interpolation."""
+    user_context = Context.builder('user-key').name('Alice').build()
+    org_context = Context.builder('org-key').kind('org').name('LaunchDarkly').set('tier', 'Enterprise').build()
+    context = Context.multi_builder().add(user_context).add(org_context).build()
+
+    agent_configs = [
+        LDAIAgentConfig(
+            key='multi-context-agent',
+            default_value=LDAIAgentDefaults(
+                enabled=True,
+                instructions="Default multi-context"
+            ),
+            variables={}
+        )
+    ]
+
+    agents = ldai_client.agents(agent_configs, context)
+
+    agent = agents['multi-context-agent']
+    assert agent.instructions == 'Welcome Alice from LaunchDarkly! Your organization tier is Enterprise.'
+
+
+def test_disabled_agent_single_method(ldai_client: LDAIClient):
+    """Test that disabled agents are properly handled in single agent method."""
+    context = Context.create('user-key')
+    config = LDAIAgentConfig(
+        key='disabled-agent',
+        default_value=LDAIAgentDefaults(enabled=False),
+        variables={}
+    )
+
+    agent = ldai_client.agent(config, context)
+
+    assert agent.enabled is False
+    assert agent.tracker is not None
+
+
+def test_disabled_agent_multiple_method(ldai_client: LDAIClient):
+    """Test that disabled agents are properly handled in multiple agents method."""
+    context = Context.create('user-key')
+
+    agent_configs = [
+        LDAIAgentConfig(
+            key='disabled-agent',
+            default_value=LDAIAgentDefaults(enabled=False),
+            variables={}
+        )
+    ]
+
+    agents = ldai_client.agents(agent_configs, context)
+
+    assert len(agents) == 1
+    assert agents['disabled-agent'].enabled is False
+
+
+def test_agent_with_missing_metadata(ldai_client: LDAIClient):
+    """Test agent handling when metadata is minimal or missing."""
+    context = Context.create('user-key')
+    config = LDAIAgentConfig(
+        key='minimal-agent',
+        default_value=LDAIAgentDefaults(
+            enabled=False,
+            model=ModelConfig('default-model'),
+            instructions="Default instructions"
+        )
+    )
+
+    agent = ldai_client.agent(config, context)
+
+    assert agent.enabled is True  # From flag
+    assert agent.instructions == 'Minimal agent configuration.'
+    assert agent.model == config.default_value.model  # Falls back to default
+    assert agent.tracker is not None
+
+
+def test_agent_config_dataclass():
+    """Test the LDAIAgentConfig dataclass functionality."""
+    config = LDAIAgentConfig(
+        key='test-agent',
+        default_value=LDAIAgentDefaults(
+            enabled=True,
+            instructions="Test instructions"
+        ),
+        variables={'key': 'value'}
+    )
+
+    assert config.key == 'test-agent'
+    assert config.default_value.enabled is True
+    assert config.default_value.instructions == "Test instructions"
+    assert config.variables == {'key': 'value'}
+
+    # Test with no variables
+    config_no_vars = LDAIAgentConfig(
+        key='test-agent-2',
+        default_value=LDAIAgentDefaults(enabled=False)
+    )
+
+    assert config_no_vars.key == 'test-agent-2'
+    assert config_no_vars.variables is None
diff --git a/packages/core/tests/test_model_config.py b/packages/core/tests/test_model_config.py
new file mode 100644
index 0000000..d556c10
--- /dev/null
+++ b/packages/core/tests/test_model_config.py
@@ -0,0 +1,330 @@
+import pytest
+from ldclient import Config, Context, LDClient
+from ldclient.integrations.test_data import TestData
+
+from ldai import AICompletionConfigDefault, LDAIClient, LDMessage, ModelConfig
+
+
+@pytest.fixture
+def td() -> TestData:
+    td = TestData.data_source()
+    td.update(
+        td.flag('model-config')
+        .variations(
+            {
+                'model': {'name': 'fakeModel', 'parameters': {'temperature': 0.5, 'maxTokens': 4096}, 'custom': {'extra-attribute': 'value'}},
+                'provider': {'name': 'fakeProvider'},
+                'messages': [{'role': 'system', 'content': 'Hello, {{name}}!'}],
+                '_ldMeta': {'enabled': True, 'variationKey': 'abcd', 'version': 1},
+            },
+            "green",
+        )
+        .variation_for_all(0)
+    )
+
+    td.update(
+        td.flag('multiple-messages')
+        .variations(
+            {
+                'model': {'name': 'fakeModel', 'parameters': {'temperature': 0.7, 'maxTokens': 8192}},
+                'messages': [
+                    {'role': 'system', 'content': 'Hello, {{name}}!'},
+                    {'role': 'user', 'content': 'The day is, {{day}}!'},
+                ],
+                '_ldMeta': {'enabled': True, 'variationKey': 'abcd', 'version': 1},
+            },
+            "green",
+        )
+        .variation_for_all(0)
+    )
+
+    td.update(
+        td.flag('ctx-interpolation')
+        .variations(
+            {
+                'model': {'name': 'fakeModel', 'parameters': {'extra-attribute': 'I can be anything I set my mind/type to'}},
+                'messages': [{'role': 'system', 'content': 'Hello, {{ldctx.name}}! Is your last name {{ldctx.last}}?'}],
+                '_ldMeta': {'enabled': True, 'variationKey': 'abcd', 'version': 1},
+            }
+        )
+        .variation_for_all(0)
+    )
+
+    td.update(
+        td.flag('multi-ctx-interpolation')
+        .variations(
+            {
+                'model': {'name': 'fakeModel', 'parameters': {'extra-attribute': 'I can be anything I set my mind/type to'}},
+                'messages': [{'role': 'system', 'content': 'Hello, {{ldctx.user.name}}! Do you work for {{ldctx.org.shortname}}?'}],
+                '_ldMeta': {'enabled': True, 'variationKey': 'abcd', 'version': 1},
+            }
+        )
+        .variation_for_all(0)
+    )
+
+    td.update(
+        td.flag('off-config')
+        .variations(
+            {
+                'model': {'name': 'fakeModel', 'parameters': {'temperature': 0.1}},
+                'messages': [{'role': 'system', 'content': 'Hello, {{name}}!'}],
+                '_ldMeta': {'enabled': False, 'variationKey': 'abcd', 'version': 1},
+            }
+        )
+        .variation_for_all(0)
+    )
+
+    td.update(
+        td.flag('initial-config-disabled')
+        .variations(
+            {
+                '_ldMeta': {'enabled': False},
+            },
+            {
+                '_ldMeta': {'enabled': True},
+            }
+        )
+        .variation_for_all(0)
+    )
+
+    td.update(
+        td.flag('initial-config-enabled')
+        .variations(
+            {
+                '_ldMeta': {'enabled': False},
+            },
+            {
+                '_ldMeta': {'enabled': True},
+            }
+        )
+        .variation_for_all(1)
+    )
+
+    return td
+
+
+@pytest.fixture
+def client(td: TestData) -> LDClient:
+    config = Config('sdk-key', update_processor_class=td, send_events=False)
+    return LDClient(config=config)
+
+
+@pytest.fixture
+def ldai_client(client: LDClient) -> LDAIClient:
+    return LDAIClient(client)
+
+
+def test_model_config_delegates_to_properties():
+    model = ModelConfig('fakeModel', parameters={'extra-attribute': 'value'})
+    assert model.name == 'fakeModel'
+    assert model.get_parameter('extra-attribute') == 'value'
+    assert model.get_parameter('non-existent') is None
+
+    assert model.name == model.get_parameter('name')
+
+
+def test_model_config_handles_custom():
+    model = ModelConfig('fakeModel', custom={'extra-attribute': 'value'})
+    assert model.name == 'fakeModel'
+    assert model.get_parameter('extra-attribute') is None
+    assert model.get_custom('non-existent') is None
+    assert model.get_custom('name') is None
+
+
+def test_uses_default_on_invalid_flag(ldai_client: LDAIClient):
+    context = Context.create('user-key')
+    default_value = AICompletionConfigDefault(
+        enabled=True,
+        model=ModelConfig('fakeModel', parameters={'temperature': 0.5, 'maxTokens': 4096}),
+        messages=[LDMessage(role='system', content='Hello, {{name}}!')],
+    )
+    variables = {'name': 'World'}
+
+    config = ldai_client.config('missing-flag', context, default_value, variables)
+
+    assert config.messages is not None
+    assert len(config.messages) > 0
+    assert config.messages[0].content == 'Hello, World!'
+    assert config.enabled is True
+
+    assert config.model is not None
+    assert config.model.name == 'fakeModel'
+    assert config.model.get_parameter('temperature') == 0.5
+    assert config.model.get_parameter('maxTokens') == 4096
+
+
+def test_model_config_interpolation(ldai_client: LDAIClient):
+    context = Context.create('user-key')
+    default_value = AICompletionConfigDefault(
+        enabled=True,
+        model=ModelConfig('fakeModel'),
+        messages=[LDMessage(role='system', content='Hello, {{name}}!')],
+    )
+    variables = {'name': 'World'}
+
+    config = ldai_client.config('model-config', context, default_value, variables)
+
+    assert config.messages is not None
+    assert len(config.messages) > 0
+    assert config.messages[0].content == 'Hello, World!'
+    assert config.enabled is True
+
+    assert config.model is not None
+    assert config.model.name == 'fakeModel'
+    assert config.model.get_parameter('temperature') == 0.5
+    assert config.model.get_parameter('maxTokens') == 4096
+
+
+def test_model_config_no_variables(ldai_client: LDAIClient):
+    context = Context.create('user-key')
+    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
+
+    config = ldai_client.config('model-config', context, default_value, {})
+
+    assert config.messages is not None
+    assert len(config.messages) > 0
+    assert config.messages[0].content == 'Hello, !'
+    assert config.enabled is True
+
+    assert config.model is not None
+    assert config.model.name == 'fakeModel'
+    assert config.model.get_parameter('temperature') == 0.5
+    assert config.model.get_parameter('maxTokens') == 4096
+
+
+def test_provider_config_handling(ldai_client: LDAIClient):
+    context = Context.builder('user-key').name("Sandy").build()
+    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
+    variables = {'name': 'World'}
+
+    config = ldai_client.config('model-config', context, default_value, variables)
+
+    assert config.provider is not None
+    assert config.provider.name == 'fakeProvider'
+
+
+def test_context_interpolation(ldai_client: LDAIClient):
+    context = Context.builder('user-key').name("Sandy").set('last', 'Beaches').build()
+    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
+    variables = {'name': 'World'}
+
+    config = ldai_client.config(
+        'ctx-interpolation', context, default_value, variables
+    )
+
+    assert config.messages is not None
+    assert len(config.messages) > 0
+    assert config.messages[0].content == 'Hello, Sandy! Is your last name Beaches?'
+    assert config.enabled is True
+
+    assert config.model is not None
+    assert config.model.name == 'fakeModel'
+    assert config.model.get_parameter('temperature') is None
+    assert config.model.get_parameter('maxTokens') is None
+    assert config.model.get_parameter('extra-attribute') == 'I can be anything I set my mind/type to'
+
+
+def test_multi_context_interpolation(ldai_client: LDAIClient):
+    user_context = Context.builder('user-key').name("Sandy").build()
+    org_context = Context.builder('org-key').kind('org').name("LaunchDarkly").set('shortname', 'LD').build()
+    context = Context.multi_builder().add(user_context).add(org_context).build()
+    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
+    variables = {'name': 'World'}
+
+    config = ldai_client.config(
+        'multi-ctx-interpolation', context, default_value, variables
+    )
+
+    assert config.messages is not None
+    assert len(config.messages) > 0
+    assert config.messages[0].content == 'Hello, Sandy! Do you work for LD?'
+    assert config.enabled is True
+
+    assert config.model is not None
+    assert config.model.name == 'fakeModel'
+    assert config.model.get_parameter('temperature') is None
+    assert config.model.get_parameter('maxTokens') is None
+    assert config.model.get_parameter('extra-attribute') == 'I can be anything I set my mind/type to'
+
+
+def test_model_config_multiple(ldai_client: LDAIClient):
+    context = Context.create('user-key')
+    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
+    variables = {'name': 'World', 'day': 'Monday'}
+
+    config = ldai_client.config(
+        'multiple-messages', context, default_value, variables
+    )
+
+    assert config.messages is not None
+    assert len(config.messages) > 0
+    assert config.messages[0].content == 'Hello, World!'
+    assert config.messages[1].content == 'The day is, Monday!'
+    assert config.enabled is True
+
+    assert config.model is not None
+    assert config.model.name == 'fakeModel'
+    assert config.model.get_parameter('temperature') == 0.7
+    assert config.model.get_parameter('maxTokens') == 8192
+
+
+def test_model_config_disabled(ldai_client: LDAIClient):
+    context = Context.create('user-key')
+    default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[])
+
+    config = ldai_client.config('off-config', context, default_value, {})
+
+    assert config.model is not None
+    assert config.enabled is False
+    assert config.model.name == 'fakeModel'
+    assert config.model.get_parameter('temperature') == 0.1
+    assert config.model.get_parameter('maxTokens') is None
+
+
+def test_model_initial_config_disabled(ldai_client: LDAIClient):
+    context = Context.create('user-key')
+    default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[])
+
+    config = ldai_client.config('initial-config-disabled', context, default_value, {})
+
+    assert config.enabled is False
+    assert config.model is None
+    assert config.messages is None
+    assert config.provider is None
+
+
+def test_model_initial_config_enabled(ldai_client: LDAIClient):
+    context = Context.create('user-key')
+    default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[])
+
+    config = ldai_client.config('initial-config-enabled', context, default_value, {})
+
+    assert config.enabled is True
+    assert config.model is None
+    assert config.messages is None
+    assert config.provider is None
+
+
+def test_config_method_tracking(ldai_client: LDAIClient):
+    from unittest.mock import Mock
+
+    mock_client = Mock()
+    mock_client.variation.return_value = {
+        '_ldMeta': {'enabled': True, 'variationKey': 'test-variation', 'version': 1},
+        'model': {'name': 'test-model'},
+        'provider': {'name': 'test-provider'},
+        'messages': []
+    }
+
+    client = LDAIClient(mock_client)
+    context = Context.create('user-key')
+    default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[])
+
+    config = client.config('test-config-key', context, default_value)
+
+    mock_client.track.assert_called_once_with(
+        '$ld:ai:config:function:single',
+        context,
+        'test-config-key',
+        1
+    )
diff --git a/packages/core/tests/test_tracker.py b/packages/core/tests/test_tracker.py
new file mode 100644
index 0000000..2e39d98
--- /dev/null
+++ b/packages/core/tests/test_tracker.py
@@ -0,0 +1,444 @@
+from time import sleep
+from unittest.mock import MagicMock, call
+
+import pytest
+from ldclient import Config, Context, LDClient
+from ldclient.integrations.test_data import TestData
+
+from ldai.tracker import FeedbackKind, LDAIConfigTracker, TokenUsage
+
+
+@pytest.fixture
+def td() -> TestData:
+    td = TestData.data_source()
+    td.update(
+        td.flag("model-config")
+        .variations(
+            {
+                "model": {
+                    "name": "fakeModel",
+                    "parameters": {"temperature": 0.5, "maxTokens": 4096},
+                    "custom": {"extra-attribute": "value"},
+                },
+                "provider": {"name": "fakeProvider"},
+                "messages": [{"role": "system", "content": "Hello, {{name}}!"}],
+                "_ldMeta": {"enabled": True, "variationKey": "abcd", "version": 1},
+            },
+            "green",
+        )
+        .variation_for_all(0)
+    )
+
+    return td
+
+
+@pytest.fixture
+def client(td: TestData) -> LDClient:
+    config = Config("sdk-key", update_processor_class=td, send_events=False)
+    client = LDClient(config=config)
+    client.track = MagicMock()  # type: ignore
+    return client
+
+
+def test_summary_starts_empty(client: LDClient):
+    context = Context.create("user-key")
+    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 1, "fakeModel", "fakeProvider", context)
+
+    assert tracker.get_summary().duration is None
+    assert tracker.get_summary().feedback is None
+    assert tracker.get_summary().success is None
+    assert tracker.get_summary().usage is None
+
+
+def test_tracks_duration(client: LDClient):
+    context = Context.create("user-key")
+    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
+    tracker.track_duration(100)
+
+    client.track.assert_called_with(  # type: ignore
+        "$ld:ai:duration:total",
+        context,
+        {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+        100,
+    )
+
+    assert tracker.get_summary().duration == 100
+
+
+def test_tracks_duration_of(client: LDClient):
+    context = Context.create("user-key")
+    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
+    tracker.track_duration_of(lambda: sleep(0.01))
+
+    calls = client.track.mock_calls  # type: ignore
+
+    assert len(calls) == 1
+    assert calls[0].args[0] == "$ld:ai:duration:total"
+    assert calls[0].args[1] == context
+    assert calls[0].args[2] == {
+        "variationKey": "variation-key",
+        "configKey": "config-key",
+        "version": 3,
+        "modelName": "fakeModel",
+        "providerName": "fakeProvider",
+    }
+    assert calls[0].args[3] == pytest.approx(10, rel=10)
+
+
+def test_tracks_time_to_first_token(client: LDClient):
+    context = Context.create("user-key")
+    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
+    tracker.track_time_to_first_token(100)
+
+    client.track.assert_called_with(  # type: ignore
+        "$ld:ai:tokens:ttf",
+        context,
+        {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+        100,
+    )
+
+    assert tracker.get_summary().time_to_first_token == 100
+
+
+def test_tracks_duration_of_with_exception(client: LDClient):
+    context = Context.create("user-key")
+    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
+
+    def sleep_and_throw():
+        sleep(0.01)
+        raise ValueError("Something went wrong")
+
+    try:
+        tracker.track_duration_of(sleep_and_throw)
+        assert False, "Should have thrown an exception"
+    except ValueError:
+        pass
+
+    calls = client.track.mock_calls  # type: ignore
+
+    assert len(calls) == 1
+    assert calls[0].args[0] == "$ld:ai:duration:total"
+    assert calls[0].args[1] == context
+    assert calls[0].args[2] == {
+        "variationKey": "variation-key",
+        "configKey": "config-key",
+        "version": 3,
+        "modelName": "fakeModel",
+        "providerName": "fakeProvider",
+    }
+    assert calls[0].args[3] == pytest.approx(10, rel=10)
+
+
+def test_tracks_token_usage(client: LDClient):
+    context = Context.create("user-key")
+    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
+
+    tokens = TokenUsage(300, 200, 100)
+    tracker.track_tokens(tokens)
+
+    calls = [
+        call(
+            "$ld:ai:tokens:total",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            300,
+        ),
+        call(
+            "$ld:ai:tokens:input",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            200,
+        ),
+        call(
+            "$ld:ai:tokens:output",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            100,
+        ),
+    ]
+
+    client.track.assert_has_calls(calls)  # type: ignore
+
+    assert tracker.get_summary().usage == tokens
+
+
+def test_tracks_bedrock_metrics(client: LDClient):
+    context = Context.create("user-key")
+    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
+
+    bedrock_result = {
+        "ResponseMetadata": {"HTTPStatusCode": 200},
+        "usage": {
+            "inputTokens": 220,
+            "outputTokens": 110,
+            "totalTokens": 330,
+        },
+        "metrics": {
+            "latencyMs": 50,
+        },
+    }
+    tracker.track_bedrock_converse_metrics(bedrock_result)
+
+    calls = [
+        call(
+            "$ld:ai:generation:success",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            1,
+        ),
+        call(
+            "$ld:ai:duration:total",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            50,
+        ),
+        call(
+            "$ld:ai:tokens:total",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            330,
+        ),
+        call(
+            "$ld:ai:tokens:input",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            220,
+        ),
+        call(
+            "$ld:ai:tokens:output",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            110,
+        ),
+    ]
+
+    client.track.assert_has_calls(calls)  # type: ignore
+
+    assert tracker.get_summary().success is True
+    assert tracker.get_summary().duration == 50
+    assert tracker.get_summary().usage == TokenUsage(330, 220, 110)
+
+
+def test_tracks_bedrock_metrics_with_error(client: LDClient):
+    context = Context.create("user-key")
+    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
+
+    bedrock_result = {
+        "ResponseMetadata": {"HTTPStatusCode": 500},
+        "usage": {
+            "totalTokens": 330,
+            "inputTokens": 220,
+            "outputTokens": 110,
+        },
+        "metrics": {
+            "latencyMs": 50,
+        },
+    }
+    tracker.track_bedrock_converse_metrics(bedrock_result)
+
+    calls = [
+        call(
+            "$ld:ai:generation:error",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            1,
+        ),
+        call(
+            "$ld:ai:duration:total",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            50,
+        ),
+        call(
+            "$ld:ai:tokens:total",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            330,
+        ),
+        call(
+            "$ld:ai:tokens:input",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            220,
+        ),
+        call(
+            "$ld:ai:tokens:output",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            110,
+        ),
+    ]
+
+    client.track.assert_has_calls(calls)  # type: ignore
+
+    assert tracker.get_summary().success is False
+    assert tracker.get_summary().duration == 50
+    assert tracker.get_summary().usage == TokenUsage(330, 220, 110)
+
+
+@pytest.mark.asyncio
+async def test_tracks_openai_metrics(client: LDClient):
+    context = Context.create("user-key")
+    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
+
+    class Result:
+        def __init__(self):
+            self.usage = Usage()
+
+    class Usage:
+        def to_dict(self):
+            return {
+                "total_tokens": 330,
+                "prompt_tokens": 220,
+                "completion_tokens": 110,
+            }
+
+    async def get_result():
+        return Result()
+
+    await tracker.track_openai_metrics(get_result)
+
+    calls = [
+        call(
+            "$ld:ai:generation:success",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            1,
+        ),
+        call(
+            "$ld:ai:tokens:total",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            330,
+        ),
+        call(
+            "$ld:ai:tokens:input",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            220,
+        ),
+        call(
+            "$ld:ai:tokens:output",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            110,
+        ),
+    ]
+
+    client.track.assert_has_calls(calls, any_order=False)  # type: ignore
+
+    assert tracker.get_summary().usage == TokenUsage(330, 220, 110)
+
+
+@pytest.mark.asyncio
+async def test_tracks_openai_metrics_with_exception(client: LDClient):
+    context = Context.create("user-key")
+    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
+
+    async def raise_exception():
+        raise ValueError("Something went wrong")
+
+    try:
+        await tracker.track_openai_metrics(raise_exception)
+        assert False, "Should have thrown an exception"
+    except ValueError:
+        pass
+
+    calls = [
+        call(
+            "$ld:ai:generation:error",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            1,
+        ),
+    ]
+
+    client.track.assert_has_calls(calls, any_order=False)  # type: ignore
+
+    assert tracker.get_summary().usage is None
+
+
+@pytest.mark.parametrize(
+    "kind,label",
+    [
+        pytest.param(FeedbackKind.Positive, "positive", id="positive"),
+        pytest.param(FeedbackKind.Negative, "negative", id="negative"),
+    ],
+)
+def test_tracks_feedback(client: LDClient, kind: FeedbackKind, label: str):
+    context = Context.create("user-key")
+    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
+
+    tracker.track_feedback({"kind": kind})
+
+    client.track.assert_called_with(  # type: ignore
+        f"$ld:ai:feedback:user:{label}",
+        context,
+        {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+        1,
+    )
+    assert tracker.get_summary().feedback == {"kind": kind}
+
+
+def test_tracks_success(client: LDClient):
+    context = Context.create("user-key")
+    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
+    tracker.track_success()
+
+    calls = [
+        call(
+            "$ld:ai:generation:success",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            1,
+        ),
+    ]
+
+    client.track.assert_has_calls(calls)  # type: ignore
+
+    assert tracker.get_summary().success is True
+
+
+def test_tracks_error(client: LDClient):
+    context = Context.create("user-key")
+    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
+    tracker.track_error()
+
+    calls = [
+        call(
+            "$ld:ai:generation:error",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            1,
+        ),
+    ]
+
+    client.track.assert_has_calls(calls)  # type: ignore
+
+    assert tracker.get_summary().success is False
+
+
+def test_error_overwrites_success(client: LDClient):
+    context = Context.create("user-key")
+    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
+    tracker.track_success()
+    tracker.track_error()
+
+    calls = [
+        call(
+            "$ld:ai:generation:success",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            1,
+        ),
+        call(
+            "$ld:ai:generation:error",
+            context,
+            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
+            1,
+        ),
+    ]
+
+    client.track.assert_has_calls(calls)  # type: ignore
+
+    assert tracker.get_summary().success is False
diff --git a/packages/langchain/README.md b/packages/langchain/README.md
new file mode 100644
index 0000000..06fd329
--- /dev/null
+++ b/packages/langchain/README.md
@@ -0,0 +1,58 @@
+# LaunchDarkly AI SDK - LangChain Provider
+
+This package provides LangChain provider support for the LaunchDarkly AI SDK.
+
+## Installation
+
+```bash
+pip install launchdarkly-server-sdk-ai-langchain
+```
+
+This will automatically install the core SDK (`launchdarkly-server-sdk-ai`) and LangChain dependencies.
+
+## Usage
+
+```python
+from ldclient import init, Context
+from ldai import init_ai
+
+# Initialize clients
+ld_client = init('your-sdk-key')
+ai_client = init_ai(ld_client)
+
+# Create a chat - will automatically use LangChain provider
+context = Context.create('user-key')
+chat = await ai_client.create_chat('chat-config', context, {
+    'enabled': True,
+    'provider': {'name': 'openai'},
+    'model': {'name': 'gpt-4'}
+})
+
+if chat:
+    response = await chat.invoke('Hello!')
+    print(response.message.content)
+```
+
+## Supported LangChain Providers
+
+This provider supports any LangChain-compatible model, including:
+- OpenAI (GPT-3.5, GPT-4, etc.)
+- Anthropic (Claude)
+- Google (Gemini)
+- And many more through LangChain integrations
+
+## Requirements
+
+- Python 3.9+
+- launchdarkly-server-sdk-ai >= 0.10.1
+- langchain >= 0.3.0
+- langchain-core >= 0.3.0
+
+## Documentation
+
+For full documentation, visit: https://docs.launchdarkly.com/sdk/ai/python
+
+## License
+
+Apache-2.0
+
diff --git a/packages/langchain/ldai/providers/langchain/__init__.py b/packages/langchain/ldai/providers/langchain/__init__.py
new file mode 100644
index 0000000..f2e2c35
--- /dev/null
+++ b/packages/langchain/ldai/providers/langchain/__init__.py
@@ -0,0 +1,284 @@
+"""LangChain implementation of AIProvider for LaunchDarkly AI SDK."""
+
+from typing import Any, Dict, List, Optional
+
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
+
+from ldai.models import AIConfigKind, LDMessage
+from ldai.providers.ai_provider import AIProvider
+from ldai.providers.types import ChatResponse, LDAIMetrics, StructuredResponse
+from ldai.tracker import TokenUsage
+
+
+class LangChainProvider(AIProvider):
+    """
+    LangChain implementation of AIProvider.
+    
+    This provider integrates LangChain models with LaunchDarkly's tracking capabilities.
+    """
+
+    def __init__(self, llm: BaseChatModel, logger: Optional[Any] = None):
+        """
+        Initialize the LangChain provider.
+        
+        :param llm: LangChain BaseChatModel instance
+        :param logger: Optional logger for logging provider operations
+        """
+        super().__init__(logger)
+        self._llm = llm
+
+    # =============================================================================
+    # MAIN FACTORY METHOD
+    # =============================================================================
+
+    @staticmethod
+    async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'LangChainProvider':
+        """
+        Static factory method to create a LangChain AIProvider from an AI configuration.
+        
+        :param ai_config: The LaunchDarkly AI configuration
+        :param logger: Optional logger for the provider
+        :return: Configured LangChainProvider instance
+        """
+        llm = await LangChainProvider.create_langchain_model(ai_config)
+        return LangChainProvider(llm, logger)
+
+    # =============================================================================
+    # INSTANCE METHODS (AIProvider Implementation)
+    # =============================================================================
+
+    async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
+        """
+        Invoke the LangChain model with an array of messages.
+        
+        :param messages: Array of LDMessage objects representing the conversation
+        :return: ChatResponse containing the model's response
+        """
+        try:
+            # Convert LDMessage[] to LangChain messages
+            langchain_messages = LangChainProvider.convert_messages_to_langchain(messages)
+
+            # Get the LangChain response
+            response: AIMessage = await self._llm.ainvoke(langchain_messages)
+
+            # Generate metrics early (assumes success by default)
+            metrics = LangChainProvider.get_ai_metrics_from_response(response)
+
+            # Extract text content from the response
+            content: str = ''
+            if isinstance(response.content, str):
+                content = response.content
+            else:
+                # Log warning for non-string content (likely multimodal)
+                if self.logger:
+                    self.logger.warn(
+                        f"Multimodal response not supported, expecting a string. "
+                        f"Content type: {type(response.content)}, Content: {response.content}"
+                    )
+                # Update metrics to reflect content loss
+                metrics.success = False
+
+            # Create the assistant message
+            from ldai.models import LDMessage
+            assistant_message = LDMessage(role='assistant', content=content)
+
+            return ChatResponse(
+                message=assistant_message,
+                metrics=metrics,
+            )
+        except Exception as error:
+            if self.logger:
+                self.logger.warn(f'LangChain model invocation failed: {error}')
+
+            from ldai.models import LDMessage
+            return ChatResponse(
+                message=LDMessage(role='assistant', content=''),
+                metrics=LDAIMetrics(success=False, usage=None),
+            )
+
+    async def invoke_structured_model(
+        self,
+        messages: List[LDMessage],
+        response_structure: Dict[str, Any],
+    ) -> StructuredResponse:
+        """
+        Invoke the LangChain model with structured output support.
+        
+        :param messages: Array of LDMessage objects representing the conversation
+        :param response_structure: Dictionary of output configurations keyed by output name
+        :return: StructuredResponse containing the structured data
+        """
+        try:
+            # Convert LDMessage[] to LangChain messages
+            langchain_messages = LangChainProvider.convert_messages_to_langchain(messages)
+
+            # Get the LangChain response with structured output
+            # Note: with_structured_output is available on BaseChatModel in newer LangChain versions
+            if hasattr(self._llm, 'with_structured_output'):
+                structured_llm = self._llm.with_structured_output(response_structure)
+                response = await structured_llm.ainvoke(langchain_messages)
+            else:
+                # Fallback: invoke normally and try to parse as JSON
+                response_obj = await self._llm.ainvoke(langchain_messages)
+                if isinstance(response_obj, AIMessage):
+                    import json
+                    try:
+                        response = json.loads(response_obj.content)
+                    except json.JSONDecodeError:
+                        response = {'content': response_obj.content}
+                else:
+                    response = response_obj
+
+            # Using structured output doesn't support metrics
+            metrics = LDAIMetrics(
+                success=True,
+                usage=TokenUsage(total=0, input=0, output=0),
+            )
+
+            import json
+            return StructuredResponse(
+                data=response if isinstance(response, dict) else {'result': response},
+                raw_response=json.dumps(response) if not isinstance(response, str) else response,
+                metrics=metrics,
+            )
+        except Exception as error:
+            if self.logger:
+                self.logger.warn(f'LangChain structured model invocation failed: {error}')
+
+            return StructuredResponse(
+                data={},
+                raw_response='',
+                metrics=LDAIMetrics(
+                    success=False,
+                    usage=TokenUsage(total=0, input=0, output=0),
+                ),
+            )
+
+    def get_chat_model(self) -> BaseChatModel:
+        """
+        Get the underlying LangChain model instance.
+        
+        :return: The LangChain BaseChatModel instance
+        """
+        return self._llm
+
+    # =============================================================================
+    # STATIC UTILITY METHODS
+    # =============================================================================
+
+    @staticmethod
+    def map_provider(ld_provider_name: str) -> str:
+        """
+        Map LaunchDarkly provider names to LangChain provider names.
+        
+        This method enables seamless integration between LaunchDarkly's standardized
+        provider naming and LangChain's naming conventions.
+        
+        :param ld_provider_name: LaunchDarkly provider name
+        :return: LangChain provider name
+        """
+        lowercased_name = ld_provider_name.lower()
+
+        mapping: Dict[str, str] = {
+            'gemini': 'google-genai',
+        }
+
+        return mapping.get(lowercased_name, lowercased_name)
+
+    @staticmethod
+    def get_ai_metrics_from_response(response: AIMessage) -> LDAIMetrics:
+        """
+        Get AI metrics from a LangChain provider response.
+        
+        This method extracts token usage information and success status from LangChain responses
+        and returns a LaunchDarkly LDAIMetrics object.
+        
+        :param response: The response from the LangChain model
+        :return: LDAIMetrics with success status and token usage
+        """
+        # Extract token usage if available
+        usage: Optional[TokenUsage] = None
+        if hasattr(response, 'response_metadata') and response.response_metadata:
+            token_usage = response.response_metadata.get('token_usage')
+            if token_usage:
+                usage = TokenUsage(
+                    total=token_usage.get('total_tokens', 0) or token_usage.get('totalTokens', 0) or 0,
+                    input=token_usage.get('prompt_tokens', 0) or token_usage.get('promptTokens', 0) or 0,
+                    output=token_usage.get('completion_tokens', 0) or token_usage.get('completionTokens', 0) or 0,
+                )
+
+        # LangChain responses that complete successfully are considered successful by default
+        return LDAIMetrics(success=True, usage=usage)
+
+    @staticmethod
+    def convert_messages_to_langchain(messages: List[LDMessage]) -> List[BaseMessage]:
+        """
+        Convert LaunchDarkly messages to LangChain messages.
+        
+        This helper method enables developers to work directly with LangChain message types
+        while maintaining compatibility with LaunchDarkly's standardized message format.
+        
+        :param messages: List of LDMessage objects
+        :return: List of LangChain message objects
+        """
+        result: List[BaseMessage] = []
+        for msg in messages:
+            if msg.role == 'system':
+                result.append(SystemMessage(content=msg.content))
+            elif msg.role == 'user':
+                result.append(HumanMessage(content=msg.content))
+            elif msg.role == 'assistant':
+                result.append(AIMessage(content=msg.content))
+            else:
+                raise ValueError(f'Unsupported message role: {msg.role}')
+        return result
+
+    @staticmethod
+    async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel:
+        """
+        Create a LangChain model from an AI configuration.
+        
+        This public helper method enables developers to initialize their own LangChain models
+        using LaunchDarkly AI configurations.
+        
+        :param ai_config: The LaunchDarkly AI configuration
+        :return: A configured LangChain BaseChatModel
+        """
+        model_name = ai_config.model.name if ai_config.model else ''
+        provider = ai_config.provider.name if ai_config.provider else ''
+        parameters = ai_config.model.get_parameter('parameters') if ai_config.model else {}
+        if not isinstance(parameters, dict):
+            parameters = {}
+
+        # Use LangChain's init_chat_model to support multiple providers
+        # Note: This requires langchain package to be installed
+        try:
+            # Try to import init_chat_model from langchain.chat_models
+            # This is available in langchain >= 0.1.0
+            try:
+                from langchain.chat_models import init_chat_model
+            except ImportError:
+                # Fallback for older versions or different import path
+                from langchain.chat_models.universal import init_chat_model
+            
+            # Map provider name
+            langchain_provider = LangChainProvider.map_provider(provider)
+            
+            # Create model configuration
+            model_kwargs = {**parameters}
+            if langchain_provider:
+                model_kwargs['model_provider'] = langchain_provider
+            
+            # Initialize the chat model (init_chat_model may be async or sync)
+            result = init_chat_model(model_name, **model_kwargs)
+            # Handle both sync and async initialization
+            if hasattr(result, '__await__'):
+                return await result
+            return result
+        except ImportError as e:
+            raise ImportError(
+                'langchain package is required for LangChainProvider. '
+                'Install it with: pip install langchain langchain-core'
+            ) from e
+
diff --git a/packages/langchain/pyproject.toml b/packages/langchain/pyproject.toml
new file mode 100644
index 0000000..33d2b3c
--- /dev/null
+++ b/packages/langchain/pyproject.toml
@@ -0,0 +1,40 @@
+[tool.poetry]
+name = "launchdarkly-server-sdk-ai-langchain"
+version = "0.1.0"
+description = "LangChain provider for LaunchDarkly AI SDK"
+authors = ["LaunchDarkly <dev@launchdarkly.com>"]
+license = "Apache-2.0"
+readme = "README.md"
+homepage = "https://docs.launchdarkly.com/sdk/ai/python"
+repository = "https://github.com/launchdarkly/python-server-sdk-ai"
+documentation = "https://launchdarkly-python-sdk-ai.readthedocs.io/en/latest/"
+classifiers = [
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Software Development",
+    "Topic :: Software Development :: Libraries",
+]
+packages = [ { include = "ldai" } ]
+
+[tool.poetry.dependencies]
+python = ">=3.9,<4"
+launchdarkly-server-sdk-ai = { path = "../core", develop = true }
+langchain = ">=0.3.0,<2.0"
+langchain-core = ">=0.3.0,<2.0"
+
+[tool.poetry.group.dev.dependencies]
+pytest = ">=2.8"
+pytest-cov = ">=2.4.0"
+pytest-asyncio = ">=0.21.0"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
diff --git a/packages/langchain/tests/__init__.py b/packages/langchain/tests/__init__.py
new file mode 100644
index 0000000..8a69638
--- /dev/null
+++ b/packages/langchain/tests/__init__.py
@@ -0,0 +1,2 @@
+"""Tests for LaunchDarkly Server SDK for AI - LangChain provider."""
+
diff --git a/packages/langchain/tests/test_langchain_provider.py b/packages/langchain/tests/test_langchain_provider.py
new file mode 100644
index 0000000..3bb83a1
--- /dev/null
+++ b/packages/langchain/tests/test_langchain_provider.py
@@ -0,0 +1,237 @@
+"""Tests for LangChain provider implementation."""
+
+import pytest
+from unittest.mock import AsyncMock, Mock
+
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
+
+from ldai.models import LDMessage
+from ldai.providers.langchain import LangChainProvider
+from ldai.tracker import TokenUsage
+
+
+class TestMessageConversion:
+    """Test conversion between LD messages and LangChain messages."""
+
+    def test_convert_multiple_messages(self):
+        """Test converting a conversation with all message types."""
+        ld_messages = [
+            LDMessage(role='system', content='You are helpful'),
+            LDMessage(role='user', content='Hello'),
+            LDMessage(role='assistant', content='Hi there!'),
+        ]
+        lc_messages = LangChainProvider.convert_messages_to_langchain(ld_messages)
+        
+        assert len(lc_messages) == 3
+        assert isinstance(lc_messages[0], SystemMessage)
+        assert isinstance(lc_messages[1], HumanMessage)
+        assert isinstance(lc_messages[2], AIMessage)
+        assert lc_messages[0].content == 'You are helpful'
+        assert lc_messages[1].content == 'Hello'
+        assert lc_messages[2].content == 'Hi there!'
+
+    def test_convert_unsupported_role_raises_error(self):
+        """Test that unsupported message roles raise ValueError."""
+        ld_messages = [LDMessage(role='function', content='Function result')]
+        
+        with pytest.raises(ValueError, match='Unsupported message role: function'):
+            LangChainProvider.convert_messages_to_langchain(ld_messages)
+
+
+class TestMetricsExtraction:
+    """Test metrics extraction from LangChain response metadata."""
+
+    def test_extract_metrics_with_token_usage(self):
+        """Test extracting token usage from response metadata."""
+        response = AIMessage(
+            content='Hello, world!',
+            response_metadata={
+                'token_usage': {
+                    'total_tokens': 100,
+                    'prompt_tokens': 60,
+                    'completion_tokens': 40,
+                }
+            }
+        )
+        
+        metrics = LangChainProvider.get_ai_metrics_from_response(response)
+        
+        assert metrics.success is True
+        assert metrics.usage is not None
+        assert metrics.usage.total == 100
+        assert metrics.usage.input == 60
+        assert metrics.usage.output == 40
+
+    def test_extract_metrics_with_camel_case_token_usage(self):
+        """Test extracting token usage with camelCase keys (some providers use this)."""
+        response = AIMessage(
+            content='Hello, world!',
+            response_metadata={
+                'token_usage': {
+                    'totalTokens': 150,
+                    'promptTokens': 90,
+                    'completionTokens': 60,
+                }
+            }
+        )
+        
+        metrics = LangChainProvider.get_ai_metrics_from_response(response)
+        
+        assert metrics.success is True
+        assert metrics.usage is not None
+        assert metrics.usage.total == 150
+        assert metrics.usage.input == 90
+        assert metrics.usage.output == 60
+
+    def test_extract_metrics_without_token_usage(self):
+        """Test metrics extraction when no token usage is available."""
+        response = AIMessage(content='Hello, world!')
+        
+        metrics = LangChainProvider.get_ai_metrics_from_response(response)
+        
+        assert metrics.success is True
+        assert metrics.usage is None
+
+
+class TestInvokeModel:
+    """Test model invocation with LangChain provider."""
+
+    @pytest.mark.asyncio
+    async def test_invoke_model_success(self):
+        """Test successful model invocation."""
+        mock_llm = AsyncMock()
+        mock_response = AIMessage(
+            content='Hello, user!',
+            response_metadata={
+                'token_usage': {
+                    'total_tokens': 20,
+                    'prompt_tokens': 10,
+                    'completion_tokens': 10,
+                }
+            }
+        )
+        mock_llm.ainvoke.return_value = mock_response
+        
+        provider = LangChainProvider(mock_llm)
+        messages = [LDMessage(role='user', content='Hello')]
+        
+        response = await provider.invoke_model(messages)
+        
+        assert response.message.role == 'assistant'
+        assert response.message.content == 'Hello, user!'
+        assert response.metrics.success is True
+        assert response.metrics.usage is not None
+        assert response.metrics.usage.total == 20
+
+    @pytest.mark.asyncio
+    async def test_invoke_model_with_multimodal_content_warning(self):
+        """Test that non-string content triggers warning and marks as failure."""
+        mock_llm = AsyncMock()
+        mock_response = AIMessage(
+            content=['text', {'type': 'image'}],  # Non-string content
+            response_metadata={'token_usage': {'total_tokens': 20}}
+        )
+        mock_llm.ainvoke.return_value = mock_response
+        
+        mock_logger = Mock()
+        provider = LangChainProvider(mock_llm, logger=mock_logger)
+        messages = [LDMessage(role='user', content='Describe this image')]
+        
+        response = await provider.invoke_model(messages)
+        
+        # Should warn about multimodal content
+        mock_logger.warn.assert_called_once()
+        assert 'Multimodal response not supported' in str(mock_logger.warn.call_args)
+        
+        # Should mark as failure
+        assert response.metrics.success is False
+        assert response.message.content == ''
+
+    @pytest.mark.asyncio
+    async def test_invoke_model_with_exception(self):
+        """Test model invocation handles exceptions gracefully."""
+        mock_llm = AsyncMock()
+        mock_llm.ainvoke.side_effect = Exception('Model API error')
+        
+        mock_logger = Mock()
+        provider = LangChainProvider(mock_llm, logger=mock_logger)
+        messages = [LDMessage(role='user', content='Hello')]
+        
+        response = await provider.invoke_model(messages)
+        
+        # Should log the error
+        mock_logger.warn.assert_called_once()
+        assert 'LangChain model invocation failed' in str(mock_logger.warn.call_args)
+        
+        # Should return failure response
+        assert response.message.role == 'assistant'
+        assert response.message.content == ''
+        assert response.metrics.success is False
+        assert response.metrics.usage is None
+
+
+class TestInvokeStructuredModel:
+    """Test structured output invocation."""
+
+    @pytest.mark.asyncio
+    async def test_invoke_structured_model_with_support(self):
+        """Test structured output when model supports with_structured_output."""
+        mock_llm = Mock()
+        mock_structured_llm = AsyncMock()
+        mock_structured_llm.ainvoke.return_value = {
+            'answer': 'Paris',
+            'confidence': 0.95
+        }
+        mock_llm.with_structured_output.return_value = mock_structured_llm
+        
+        provider = LangChainProvider(mock_llm)
+        messages = [LDMessage(role='user', content='What is the capital of France?')]
+        schema = {'answer': 'string', 'confidence': 'number'}
+        
+        response = await provider.invoke_structured_model(messages, schema)
+        
+        assert response.data == {'answer': 'Paris', 'confidence': 0.95}
+        assert response.metrics.success is True
+        mock_llm.with_structured_output.assert_called_once_with(schema)
+
+    @pytest.mark.asyncio
+    async def test_invoke_structured_model_without_support_json_fallback(self):
+        """Test structured output fallback to JSON parsing when not supported."""
+        mock_llm = AsyncMock()
+        # Model doesn't have with_structured_output
+        delattr(mock_llm, 'with_structured_output') if hasattr(mock_llm, 'with_structured_output') else None
+        
+        mock_response = AIMessage(content='{"answer": "Berlin", "confidence": 0.9}')
+        mock_llm.ainvoke.return_value = mock_response
+        
+        provider = LangChainProvider(mock_llm)
+        messages = [LDMessage(role='user', content='What is the capital of Germany?')]
+        schema = {'answer': 'string', 'confidence': 'number'}
+        
+        response = await provider.invoke_structured_model(messages, schema)
+        
+        assert response.data == {'answer': 'Berlin', 'confidence': 0.9}
+        assert response.metrics.success is True
+
+    @pytest.mark.asyncio
+    async def test_invoke_structured_model_with_exception(self):
+        """Test structured output handles exceptions gracefully."""
+        mock_llm = Mock()
+        mock_llm.with_structured_output.side_effect = Exception('Structured output error')
+        
+        mock_logger = Mock()
+        provider = LangChainProvider(mock_llm, logger=mock_logger)
+        messages = [LDMessage(role='user', content='Question')]
+        schema = {'answer': 'string'}
+        
+        response = await provider.invoke_structured_model(messages, schema)
+        
+        # Should log the error
+        mock_logger.warn.assert_called_once()
+        assert 'LangChain structured model invocation failed' in str(mock_logger.warn.call_args)
+        
+        # Should return failure response
+        assert response.data == {}
+        assert response.raw_response == ''
+        assert response.metrics.success is False
+
diff --git a/release-please-config.json b/release-please-config.json
index 78df6d7..1de9de1 100644
--- a/release-please-config.json
+++ b/release-please-config.json
@@ -1,12 +1,24 @@
 {
+  "$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json",
   "packages": {
-    ".": {
+    "packages/core": {
       "release-type": "python",
+      "package-name": "launchdarkly-server-sdk-ai",
       "versioning": "default",
       "bump-minor-pre-major": true,
       "include-v-in-tag": false,
-      "extra-files": ["ldai/__init__.py", "PROVENANCE.md"],
-      "include-component-in-tag": false
+      "extra-files": ["packages/core/ldai/__init__.py", "packages/core/PROVENANCE.md"],
+      "include-component-in-tag": true,
+      "component": "core"
+    },
+    "packages/langchain": {
+      "release-type": "python",
+      "package-name": "launchdarkly-server-sdk-ai-langchain",
+      "versioning": "default",
+      "bump-minor-pre-major": true,
+      "include-v-in-tag": false,
+      "include-component-in-tag": true,
+      "component": "langchain"
     }
   }
 }

From 13849c085399fc1bb812ae613575f4bcd9d4bd75 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 09:41:07 +0100
Subject: [PATCH 19/37] Linting.

---
 packages/core/ldai/__init__.py                | 35 +++++------------
 packages/core/ldai/chat/__init__.py           |  1 -
 packages/core/ldai/chat/tracked_chat.py       | 35 +++++++++--------
 packages/core/ldai/client.py                  | 38 ++++++++-----------
 packages/core/ldai/judge/__init__.py          |  2 -
 packages/core/ldai/judge/ai_judge.py          | 26 ++++++-------
 .../ldai/judge/evaluation_schema_builder.py   |  7 ++--
 packages/core/ldai/models.py                  | 12 +++---
 packages/core/ldai/providers/ai_provider.py   | 27 +++++++------
 .../ldai/providers/ai_provider_factory.py     | 14 +++----
 packages/core/ldai/providers/types.py         |  1 -
 packages/core/ldai/tracker.py                 |  6 +--
 12 files changed, 84 insertions(+), 120 deletions(-)

diff --git a/packages/core/ldai/__init__.py b/packages/core/ldai/__init__.py
index 1363115..2e40fb6 100644
--- a/packages/core/ldai/__init__.py
+++ b/packages/core/ldai/__init__.py
@@ -5,38 +5,21 @@
 # to extend ldai.providers.* even though ldai itself has an __init__.py
 import sys
 from pkgutil import extend_path
+
 __path__ = extend_path(__path__, __name__)
 
+# Export chat
+from ldai.chat import TrackedChat
 # Export main client
 from ldai.client import LDAIClient
-
-# Export models for convenience
-from ldai.models import (
-    AIAgentConfig,
-    AIAgentConfigDefault,
-    AIAgentConfigRequest,
-    AIAgents,
-    AICompletionConfig,
-    AICompletionConfigDefault,
-    AIJudgeConfig,
-    AIJudgeConfigDefault,
-    JudgeConfiguration,
-    LDMessage,
-    ModelConfig,
-    ProviderConfig,
-    # Deprecated aliases for backward compatibility
-    AIConfig,
-    LDAIAgent,
-    LDAIAgentConfig,
-    LDAIAgentDefaults,
-)
-
 # Export judge
 from ldai.judge import AIJudge
-
-# Export chat
-from ldai.chat import TrackedChat
-
+# Export models for convenience
+from ldai.models import (  # Deprecated aliases for backward compatibility
+    AIAgentConfig, AIAgentConfigDefault, AIAgentConfigRequest, AIAgents,
+    AICompletionConfig, AICompletionConfigDefault, AIConfig, AIJudgeConfig,
+    AIJudgeConfigDefault, JudgeConfiguration, LDAIAgent, LDAIAgentConfig,
+    LDAIAgentDefaults, LDMessage, ModelConfig, ProviderConfig)
 # Export judge types
 from ldai.providers.types import EvalScore, JudgeResponse
 
diff --git a/packages/core/ldai/chat/__init__.py b/packages/core/ldai/chat/__init__.py
index ffef9ab..265a1b3 100644
--- a/packages/core/ldai/chat/__init__.py
+++ b/packages/core/ldai/chat/__init__.py
@@ -3,4 +3,3 @@
 from ldai.chat.tracked_chat import TrackedChat
 
 __all__ = ['TrackedChat']
-
diff --git a/packages/core/ldai/chat/tracked_chat.py b/packages/core/ldai/chat/tracked_chat.py
index 037fd9a..0fc9873 100644
--- a/packages/core/ldai/chat/tracked_chat.py
+++ b/packages/core/ldai/chat/tracked_chat.py
@@ -3,10 +3,10 @@
 import asyncio
 from typing import Any, Dict, List, Optional
 
+from ldai.judge import AIJudge
 from ldai.models import AICompletionConfig, LDMessage
 from ldai.providers.ai_provider import AIProvider
 from ldai.providers.types import ChatResponse, JudgeResponse
-from ldai.judge import AIJudge
 from ldai.tracker import LDAIConfigTracker
 
 
@@ -14,7 +14,7 @@ class TrackedChat:
     """
     Concrete implementation of TrackedChat that provides chat functionality
     by delegating to an AIProvider implementation.
-    
+
     This class handles conversation management and tracking, while delegating
     the actual model invocation to the provider.
     """
@@ -29,7 +29,7 @@ def __init__(
     ):
         """
         Initialize the TrackedChat.
-        
+
         :param ai_config: The completion AI configuration
         :param tracker: The tracker for the completion configuration
         :param provider: The AI provider to use for chat
@@ -46,9 +46,9 @@ def __init__(
     async def invoke(self, prompt: str) -> ChatResponse:
         """
         Invoke the chat model with a prompt string.
-        
+
         This method handles conversation management and tracking, delegating to the provider's invoke_model method.
-        
+
         :param prompt: The user prompt to send to the chat model
         :return: ChatResponse containing the model's response and metrics
         """
@@ -83,9 +83,9 @@ def _start_judge_evaluations(
     ) -> List[asyncio.Task[Optional[JudgeResponse]]]:
         """
         Start judge evaluations as async tasks without awaiting them.
-        
+
         Returns a list of async tasks that can be awaited later.
-        
+
         :param messages: Array of messages representing the conversation history
         :param response: The AI response to be evaluated
         :return: List of async tasks that will return judge evaluation results
@@ -119,13 +119,13 @@ async def evaluate_judge(judge_config):
             asyncio.create_task(evaluate_judge(judge_config))
             for judge_config in judge_configs
         ]
-        
+
         return tasks
 
     def get_config(self) -> AICompletionConfig:
         """
         Get the underlying AI configuration used to initialize this TrackedChat.
-        
+
         :return: The AI completion configuration
         """
         return self._ai_config
@@ -133,7 +133,7 @@ def get_config(self) -> AICompletionConfig:
     def get_tracker(self) -> LDAIConfigTracker:
         """
         Get the underlying AI configuration tracker used to initialize this TrackedChat.
-        
+
         :return: The tracker instance
         """
         return self._tracker
@@ -141,9 +141,9 @@ def get_tracker(self) -> LDAIConfigTracker:
     def get_provider(self) -> AIProvider:
         """
         Get the underlying AI provider instance.
-        
+
         This provides direct access to the provider for advanced use cases.
-        
+
         :return: The AI provider instance
         """
         return self._provider
@@ -151,9 +151,9 @@ def get_provider(self) -> AIProvider:
     def get_judges(self) -> Dict[str, AIJudge]:
         """
         Get the judges associated with this TrackedChat.
-        
+
         Returns a dictionary of judge instances keyed by their configuration keys.
-        
+
         :return: Dictionary of judge instances
         """
         return self._judges
@@ -161,10 +161,10 @@ def get_judges(self) -> Dict[str, AIJudge]:
     def append_messages(self, messages: List[LDMessage]) -> None:
         """
         Append messages to the conversation history.
-        
+
         Adds messages to the conversation history without invoking the model,
         which is useful for managing multi-turn conversations or injecting context.
-        
+
         :param messages: Array of messages to append to the conversation history
         """
         self._messages.extend(messages)
@@ -172,7 +172,7 @@ def append_messages(self, messages: List[LDMessage]) -> None:
     def get_messages(self, include_config_messages: bool = False) -> List[LDMessage]:
         """
         Get all messages in the conversation history.
-        
+
         :param include_config_messages: Whether to include the config messages from the AIConfig.
                                        Defaults to False.
         :return: Array of messages. When include_config_messages is True, returns both config
@@ -183,4 +183,3 @@ def get_messages(self, include_config_messages: bool = False) -> List[LDMessage]
             config_messages = self._ai_config.messages or []
             return config_messages + self._messages
         return list(self._messages)
-
diff --git a/packages/core/ldai/client.py b/packages/core/ldai/client.py
index 4f4b6b9..086e99b 100644
--- a/packages/core/ldai/client.py
+++ b/packages/core/ldai/client.py
@@ -7,21 +7,13 @@
 
 from ldai.chat import TrackedChat
 from ldai.judge import AIJudge
-from ldai.models import (
-    AIAgentConfig,
-    AIAgentConfigDefault,
-    AIAgentConfigRequest,
-    AIAgents,
-    AICompletionConfig,
-    AICompletionConfigDefault,
-    AIJudgeConfig,
-    AIJudgeConfigDefault,
-    JudgeConfiguration,
-    LDMessage,
-    ModelConfig,
-    ProviderConfig,
-)
-from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider
+from ldai.models import (AIAgentConfig, AIAgentConfigDefault,
+                         AIAgentConfigRequest, AIAgents, AICompletionConfig,
+                         AICompletionConfigDefault, AIJudgeConfig,
+                         AIJudgeConfigDefault, JudgeConfiguration, LDMessage,
+                         ModelConfig, ProviderConfig)
+from ldai.providers.ai_provider_factory import (AIProviderFactory,
+                                                SupportedAIProvider)
 from ldai.tracker import LDAIConfigTracker
 
 
@@ -204,7 +196,7 @@ async def _initialize_judges(
     ) -> Dict[str, AIJudge]:
         """
         Initialize judges from judge configurations.
-        
+
         :param judge_configs: List of judge configurations
         :param context: Standard Context used when evaluating flags
         :param variables: Dictionary of values for instruction interpolation
@@ -212,7 +204,7 @@ async def _initialize_judges(
         :return: Dictionary of judge instances keyed by their configuration keys
         """
         judges: Dict[str, AIJudge] = {}
-        
+
         async def create_judge_for_config(judge_key: str):
             judge = await self.create_judge(
                 judge_key,
@@ -222,22 +214,22 @@ async def create_judge_for_config(judge_key: str):
                 default_ai_provider,
             )
             return judge_key, judge
-        
+
         judge_promises = [
             create_judge_for_config(judge_config.key)
             for judge_config in judge_configs
         ]
-        
+
         import asyncio
         results = await asyncio.gather(*judge_promises, return_exceptions=True)
-        
+
         for result in results:
             if isinstance(result, Exception):
                 continue
-            judge_key, judge = result
+            judge_key, judge = result  # type: ignore[misc]
             if judge:
                 judges[judge_key] = judge
-        
+
         return judges
 
     async def create_chat(
@@ -275,7 +267,7 @@ async def create_chat(
             if chat:
                 response = await chat.invoke("I need help with my order")
                 print(response.message.content)
-                
+
                 # Access conversation history
                 messages = chat.get_messages()
                 print(f"Conversation has {len(messages)} messages")
diff --git a/packages/core/ldai/judge/__init__.py b/packages/core/ldai/judge/__init__.py
index 4ab4df4..0660d0e 100644
--- a/packages/core/ldai/judge/__init__.py
+++ b/packages/core/ldai/judge/__init__.py
@@ -3,5 +3,3 @@
 from ldai.judge.ai_judge import AIJudge
 
 __all__ = ['AIJudge']
-
-
diff --git a/packages/core/ldai/judge/ai_judge.py b/packages/core/ldai/judge/ai_judge.py
index 20efbf8..3caad65 100644
--- a/packages/core/ldai/judge/ai_judge.py
+++ b/packages/core/ldai/judge/ai_judge.py
@@ -5,17 +5,18 @@
 
 import chevron
 
+from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder
 from ldai.models import AIJudgeConfig, LDMessage
 from ldai.providers.ai_provider import AIProvider
-from ldai.providers.types import ChatResponse, EvalScore, JudgeResponse, StructuredResponse
+from ldai.providers.types import (ChatResponse, EvalScore, JudgeResponse,
+                                  StructuredResponse)
 from ldai.tracker import LDAIConfigTracker
-from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder
 
 
 class AIJudge:
     """
     Judge implementation that handles evaluation functionality and conversation management.
-    
+
     According to the AIEval spec, judges are AI Configs with mode: "judge" that evaluate
     other AI Configs using structured output.
     """
@@ -29,7 +30,7 @@ def __init__(
     ):
         """
         Initialize the Judge.
-        
+
         :param ai_config: The judge AI configuration
         :param ai_config_tracker: The tracker for the judge configuration
         :param ai_provider: The AI provider to use for evaluation
@@ -51,7 +52,7 @@ async def evaluate(
     ) -> Optional[JudgeResponse]:
         """
         Evaluates an AI response using the judge's configuration.
-        
+
         :param input_text: The input prompt or question that was provided to the AI
         :param output_text: The AI-generated response to be evaluated
         :param sampling_rate: Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1)
@@ -113,7 +114,7 @@ async def evaluate_messages(
     ) -> Optional[JudgeResponse]:
         """
         Evaluates an AI response from chat messages and response.
-        
+
         :param messages: Array of messages representing the conversation history
         :param response: The AI response to be evaluated
         :param sampling_ratio: Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1)
@@ -127,7 +128,7 @@ async def evaluate_messages(
     def get_ai_config(self) -> AIJudgeConfig:
         """
         Returns the AI Config used by this judge.
-        
+
         :return: The judge AI configuration
         """
         return self._ai_config
@@ -135,7 +136,7 @@ def get_ai_config(self) -> AIJudgeConfig:
     def get_tracker(self) -> LDAIConfigTracker:
         """
         Returns the tracker associated with this judge.
-        
+
         :return: The tracker for the judge configuration
         """
         return self._ai_config_tracker
@@ -143,7 +144,7 @@ def get_tracker(self) -> LDAIConfigTracker:
     def get_provider(self) -> AIProvider:
         """
         Returns the AI provider used by this judge.
-        
+
         :return: The AI provider
         """
         return self._ai_provider
@@ -151,7 +152,7 @@ def get_provider(self) -> AIProvider:
     def _construct_evaluation_messages(self, input_text: str, output_text: str) -> list[LDMessage]:
         """
         Constructs evaluation messages by combining judge's config messages with input/output.
-        
+
         :param input_text: The input text
         :param output_text: The output text to evaluate
         :return: List of messages for evaluation
@@ -173,7 +174,7 @@ def _construct_evaluation_messages(self, input_text: str, output_text: str) -> l
     def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str:
         """
         Interpolates message content with variables using Mustache templating.
-        
+
         :param content: The message content template
         :param variables: Variables to interpolate
         :return: Interpolated message content
@@ -184,7 +185,7 @@ def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str:
     def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScore]:
         """
         Parses the structured evaluation response from the AI provider.
-        
+
         :param data: The structured response data
         :return: Dictionary of evaluation scores keyed by metric key
         """
@@ -227,4 +228,3 @@ def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScor
             results[metric_key] = EvalScore(score=float(score), reasoning=reasoning)
 
         return results
-
diff --git a/packages/core/ldai/judge/evaluation_schema_builder.py b/packages/core/ldai/judge/evaluation_schema_builder.py
index 1965e64..c996f08 100644
--- a/packages/core/ldai/judge/evaluation_schema_builder.py
+++ b/packages/core/ldai/judge/evaluation_schema_builder.py
@@ -13,7 +13,7 @@ class EvaluationSchemaBuilder:
     def build(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
         """
         Build an evaluation response schema from evaluation metric keys.
-        
+
         :param evaluation_metric_keys: List of evaluation metric keys
         :return: Schema dictionary for structured output
         """
@@ -38,7 +38,7 @@ def build(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
     def _build_key_properties(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
         """
         Build properties for each evaluation metric key.
-        
+
         :param evaluation_metric_keys: List of evaluation metric keys
         :return: Dictionary of properties for each key
         """
@@ -51,7 +51,7 @@ def _build_key_properties(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
     def _build_key_schema(key: str) -> Dict[str, Any]:
         """
         Build schema for a single evaluation metric key.
-        
+
         :param key: Evaluation metric key
         :return: Schema dictionary for the key
         """
@@ -72,4 +72,3 @@ def _build_key_schema(key: str) -> Dict[str, Any]:
             'required': ['score', 'reasoning'],
             'additionalProperties': False,
         }
-
diff --git a/packages/core/ldai/models.py b/packages/core/ldai/models.py
index fa36f8c..c2abe56 100644
--- a/packages/core/ldai/models.py
+++ b/packages/core/ldai/models.py
@@ -110,7 +110,7 @@ class JudgeConfiguration:
     """
     Configuration for judge attachment to AI Configs.
     """
-    
+
     @dataclass(frozen=True)
     class Judge:
         """
@@ -127,7 +127,7 @@ def to_dict(self) -> dict:
                 'key': self.key,
                 'samplingRate': self.sampling_rate,
             }
-    
+
     judges: List['JudgeConfiguration.Judge']
 
     def to_dict(self) -> dict:
@@ -347,10 +347,9 @@ class AIAgentConfigRequest:
 # The old AIConfig had optional enabled, so it maps to AICompletionConfigDefault
 # The old AIConfig return type had required enabled, so it maps to AICompletionConfig
 
-# Deprecated: Use AICompletionConfigDefault instead
-# This was the old AIConfig with optional enabled (used as input/default)
-# Note: We map to AICompletionConfigDefault since the old AIConfig had optional enabled
-AIConfig = AICompletionConfigDefault
+# Note: AIConfig is now the base class for all config types (defined above at line 169)
+# For default configs (with optional enabled), use AICompletionConfigDefault instead
+# For required configs (with required enabled), use AICompletionConfig instead
 
 # Deprecated: Use AIAgentConfigDefault instead
 LDAIAgentDefaults = AIAgentConfigDefault
@@ -360,4 +359,3 @@ class AIAgentConfigRequest:
 
 # Deprecated: Use AIAgentConfig instead (note: this was the old return type)
 LDAIAgent = AIAgentConfig
-
diff --git a/packages/core/ldai/providers/ai_provider.py b/packages/core/ldai/providers/ai_provider.py
index daf56c6..2fec172 100644
--- a/packages/core/ldai/providers/ai_provider.py
+++ b/packages/core/ldai/providers/ai_provider.py
@@ -10,10 +10,10 @@
 class AIProvider(ABC):
     """
     Abstract base class for AI providers that implement chat model functionality.
-    
+
     This class provides the contract that all provider implementations must follow
     to integrate with LaunchDarkly's tracking and configuration capabilities.
-    
+
     Following the AICHAT spec recommendation to use base classes with non-abstract methods
     for better extensibility and backwards compatibility.
     """
@@ -21,7 +21,7 @@ class AIProvider(ABC):
     def __init__(self, logger: Optional[Any] = None):
         """
         Initialize the AI provider.
-        
+
         :param logger: Optional logger for logging provider operations.
         """
         self.logger = logger
@@ -29,19 +29,19 @@ def __init__(self, logger: Optional[Any] = None):
     async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
         """
         Invoke the chat model with an array of messages.
-        
+
         This method should convert messages to provider format, invoke the model,
         and return a ChatResponse with the result and metrics.
-        
+
         Default implementation takes no action and returns a placeholder response.
         Provider implementations should override this method.
-        
+
         :param messages: Array of LDMessage objects representing the conversation
         :return: ChatResponse containing the model's response
         """
         if self.logger:
             self.logger.warn('invokeModel not implemented by this provider')
-        
+
         return ChatResponse(
             message=LDMessage(role='assistant', content=''),
             metrics=LDAIMetrics(success=False, usage=None),
@@ -54,20 +54,20 @@ async def invoke_structured_model(
     ) -> StructuredResponse:
         """
         Invoke the chat model with structured output support.
-        
+
         This method should convert messages to provider format, invoke the model with
         structured output configuration, and return a structured response.
-        
+
         Default implementation takes no action and returns a placeholder response.
         Provider implementations should override this method.
-        
+
         :param messages: Array of LDMessage objects representing the conversation
         :param response_structure: Dictionary of output configurations keyed by output name
         :return: StructuredResponse containing the structured data
         """
         if self.logger:
             self.logger.warn('invokeStructuredModel not implemented by this provider')
-        
+
         return StructuredResponse(
             data={},
             raw_response='',
@@ -79,13 +79,12 @@ async def invoke_structured_model(
     async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'AIProvider':
         """
         Static method that constructs an instance of the provider.
-        
+
         Each provider implementation must provide their own static create method
         that accepts an AIConfigKind and returns a configured instance.
-        
+
         :param ai_config: The LaunchDarkly AI configuration
         :param logger: Optional logger for the provider
         :return: Configured provider instance
         """
         raise NotImplementedError('Provider implementations must override the static create method')
-
diff --git a/packages/core/ldai/providers/ai_provider_factory.py b/packages/core/ldai/providers/ai_provider_factory.py
index bd17485..9f5a6e4 100644
--- a/packages/core/ldai/providers/ai_provider_factory.py
+++ b/packages/core/ldai/providers/ai_provider_factory.py
@@ -6,7 +6,6 @@
 from ldai.models import AIConfigKind
 from ldai.providers.ai_provider import AIProvider
 
-
 # List of supported AI providers
 SUPPORTED_AI_PROVIDERS = [
     # Multi-provider packages should be last in the list
@@ -30,10 +29,10 @@ async def create(
     ) -> Optional[AIProvider]:
         """
         Create an AIProvider instance based on the AI configuration.
-        
+
         This method attempts to load provider-specific implementations dynamically.
         Returns None if the provider is not supported.
-        
+
         :param ai_config: The AI configuration
         :param logger: Optional logger for logging provider initialization
         :param default_ai_provider: Optional default AI provider to use
@@ -63,7 +62,7 @@ def _get_providers_to_try(
     ) -> List[SupportedAIProvider]:
         """
         Determine which providers to try based on default_ai_provider and provider_name.
-        
+
         :param default_ai_provider: Optional default provider to use
         :param provider_name: Optional provider name from config
         :return: List of providers to try in order
@@ -84,7 +83,7 @@ def _get_providers_to_try(
         for provider in multi_provider_packages:
             provider_set.add(provider)
 
-        return list(provider_set)
+        return list(provider_set)  # type: ignore[arg-type]
 
     @staticmethod
     async def _try_create_provider(
@@ -94,7 +93,7 @@ async def _try_create_provider(
     ) -> Optional[AIProvider]:
         """
         Try to create a provider of the specified type.
-        
+
         :param provider_type: Type of provider to create
         :param ai_config: AI configuration
         :param logger: Optional logger
@@ -138,7 +137,7 @@ async def _create_provider(
     ) -> Optional[AIProvider]:
         """
         Create a provider instance dynamically.
-        
+
         :param package_name: Name of the package containing the provider
         :param provider_class_name: Name of the provider class
         :param ai_config: AI configuration
@@ -166,4 +165,3 @@ async def _create_provider(
                     f"with package {package_name}: {error}"
                 )
             return None
-
diff --git a/packages/core/ldai/providers/types.py b/packages/core/ldai/providers/types.py
index 45df755..de54698 100644
--- a/packages/core/ldai/providers/types.py
+++ b/packages/core/ldai/providers/types.py
@@ -89,4 +89,3 @@ def to_dict(self) -> Dict[str, Any]:
         if self.error is not None:
             result['error'] = self.error
         return result
-
diff --git a/packages/core/ldai/tracker.py b/packages/core/ldai/tracker.py
index 632f0f4..11b846a 100644
--- a/packages/core/ldai/tracker.py
+++ b/packages/core/ldai/tracker.py
@@ -211,7 +211,7 @@ def track_eval_scores(self, scores: Dict[str, Any]) -> None:
         :param scores: Dictionary mapping metric keys to their evaluation scores (EvalScore objects)
         """
         from ldai.providers.types import EvalScore
-        
+
         # Track each evaluation score individually
         for metric_key, eval_score in scores.items():
             if isinstance(eval_score, EvalScore):
@@ -229,12 +229,12 @@ def track_judge_response(self, judge_response: Any) -> None:
         :param judge_response: JudgeResponse object containing evals and success status
         """
         from ldai.providers.types import JudgeResponse
-        
+
         if isinstance(judge_response, JudgeResponse):
             # Track evaluation scores
             if judge_response.evals:
                 self.track_eval_scores(judge_response.evals)
-            
+
             # Track success/error based on judge response
             if judge_response.success:
                 self.track_success()

From b94c9f7d588738d91be0f0f29961e7860c8ae405 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 09:41:49 +0100
Subject: [PATCH 20/37] Eiminate unused components.

---
 ldai/__init__.py                        |  59 ---
 ldai/chat/__init__.py                   |   6 -
 ldai/chat/tracked_chat.py               | 186 --------
 ldai/client.py                          | 578 ------------------------
 ldai/judge/__init__.py                  |   7 -
 ldai/judge/ai_judge.py                  | 230 ----------
 ldai/judge/evaluation_schema_builder.py |  75 ---
 ldai/models.py                          | 363 ---------------
 ldai/providers/__init__.py              |  21 -
 ldai/providers/ai_provider.py           |  91 ----
 ldai/providers/ai_provider_factory.py   | 169 -------
 ldai/providers/langchain/__init__.py    | 284 ------------
 ldai/providers/types.py                 |  92 ----
 ldai/testing/__init__.py                |   0
 ldai/testing/test_agents.py             | 342 --------------
 ldai/testing/test_langchain_provider.py | 237 ----------
 ldai/testing/test_model_config.py       | 330 --------------
 ldai/testing/test_tracker.py            | 444 ------------------
 ldai/tracker.py                         | 404 -----------------
 pyproject.toml                          |  72 ---
 20 files changed, 3990 deletions(-)
 delete mode 100644 ldai/__init__.py
 delete mode 100644 ldai/chat/__init__.py
 delete mode 100644 ldai/chat/tracked_chat.py
 delete mode 100644 ldai/client.py
 delete mode 100644 ldai/judge/__init__.py
 delete mode 100644 ldai/judge/ai_judge.py
 delete mode 100644 ldai/judge/evaluation_schema_builder.py
 delete mode 100644 ldai/models.py
 delete mode 100644 ldai/providers/__init__.py
 delete mode 100644 ldai/providers/ai_provider.py
 delete mode 100644 ldai/providers/ai_provider_factory.py
 delete mode 100644 ldai/providers/langchain/__init__.py
 delete mode 100644 ldai/providers/types.py
 delete mode 100644 ldai/testing/__init__.py
 delete mode 100644 ldai/testing/test_agents.py
 delete mode 100644 ldai/testing/test_langchain_provider.py
 delete mode 100644 ldai/testing/test_model_config.py
 delete mode 100644 ldai/testing/test_tracker.py
 delete mode 100644 ldai/tracker.py
 delete mode 100644 pyproject.toml

diff --git a/ldai/__init__.py b/ldai/__init__.py
deleted file mode 100644
index 617ac3a..0000000
--- a/ldai/__init__.py
+++ /dev/null
@@ -1,59 +0,0 @@
-__version__ = "0.10.1"  # x-release-please-version
-
-# Export main client
-from ldai.client import LDAIClient
-
-# Export models for convenience
-from ldai.models import (
-    AIAgentConfig,
-    AIAgentConfigDefault,
-    AIAgentConfigRequest,
-    AIAgents,
-    AICompletionConfig,
-    AICompletionConfigDefault,
-    AIJudgeConfig,
-    AIJudgeConfigDefault,
-    JudgeConfiguration,
-    LDMessage,
-    ModelConfig,
-    ProviderConfig,
-    # Deprecated aliases for backward compatibility
-    AIConfig,
-    LDAIAgent,
-    LDAIAgentConfig,
-    LDAIAgentDefaults,
-)
-
-# Export judge
-from ldai.judge import AIJudge
-
-# Export chat
-from ldai.chat import TrackedChat
-
-# Export judge types
-from ldai.providers.types import EvalScore, JudgeResponse
-
-__all__ = [
-    'LDAIClient',
-    'AIAgentConfig',
-    'AIAgentConfigDefault',
-    'AIAgentConfigRequest',
-    'AIAgents',
-    'AICompletionConfig',
-    'AICompletionConfigDefault',
-    'AIJudgeConfig',
-    'AIJudgeConfigDefault',
-    'AIJudge',
-    'TrackedChat',
-    'EvalScore',
-    'JudgeConfiguration',
-    'JudgeResponse',
-    'LDMessage',
-    'ModelConfig',
-    'ProviderConfig',
-    # Deprecated exports
-    'AIConfig',
-    'LDAIAgent',
-    'LDAIAgentConfig',
-    'LDAIAgentDefaults',
-]
diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py
deleted file mode 100644
index ffef9ab..0000000
--- a/ldai/chat/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""Chat module for LaunchDarkly AI SDK."""
-
-from ldai.chat.tracked_chat import TrackedChat
-
-__all__ = ['TrackedChat']
-
diff --git a/ldai/chat/tracked_chat.py b/ldai/chat/tracked_chat.py
deleted file mode 100644
index 037fd9a..0000000
--- a/ldai/chat/tracked_chat.py
+++ /dev/null
@@ -1,186 +0,0 @@
-"""TrackedChat implementation for managing AI chat conversations."""
-
-import asyncio
-from typing import Any, Dict, List, Optional
-
-from ldai.models import AICompletionConfig, LDMessage
-from ldai.providers.ai_provider import AIProvider
-from ldai.providers.types import ChatResponse, JudgeResponse
-from ldai.judge import AIJudge
-from ldai.tracker import LDAIConfigTracker
-
-
-class TrackedChat:
-    """
-    Concrete implementation of TrackedChat that provides chat functionality
-    by delegating to an AIProvider implementation.
-    
-    This class handles conversation management and tracking, while delegating
-    the actual model invocation to the provider.
-    """
-
-    def __init__(
-        self,
-        ai_config: AICompletionConfig,
-        tracker: LDAIConfigTracker,
-        provider: AIProvider,
-        judges: Optional[Dict[str, AIJudge]] = None,
-        logger: Optional[Any] = None,
-    ):
-        """
-        Initialize the TrackedChat.
-        
-        :param ai_config: The completion AI configuration
-        :param tracker: The tracker for the completion configuration
-        :param provider: The AI provider to use for chat
-        :param judges: Optional dictionary of judge instances keyed by their configuration keys
-        :param logger: Optional logger for logging
-        """
-        self._ai_config = ai_config
-        self._tracker = tracker
-        self._provider = provider
-        self._judges = judges or {}
-        self._logger = logger
-        self._messages: List[LDMessage] = []
-
-    async def invoke(self, prompt: str) -> ChatResponse:
-        """
-        Invoke the chat model with a prompt string.
-        
-        This method handles conversation management and tracking, delegating to the provider's invoke_model method.
-        
-        :param prompt: The user prompt to send to the chat model
-        :return: ChatResponse containing the model's response and metrics
-        """
-        # Convert prompt string to LDMessage with role 'user' and add to conversation history
-        user_message: LDMessage = LDMessage(role='user', content=prompt)
-        self._messages.append(user_message)
-
-        # Prepend config messages to conversation history for model invocation
-        config_messages = self._ai_config.messages or []
-        all_messages = config_messages + self._messages
-
-        # Delegate to provider-specific implementation with tracking
-        response = await self._tracker.track_metrics_of(
-            lambda result: result.metrics,
-            lambda: self._provider.invoke_model(all_messages),
-        )
-
-        # Start judge evaluations as async tasks (don't await them)
-        judge_config = self._ai_config.judge_configuration
-        if judge_config and judge_config.judges and len(judge_config.judges) > 0:
-            evaluation_tasks = self._start_judge_evaluations(self._messages, response)
-            response.evaluations = evaluation_tasks
-
-        # Add the response message to conversation history
-        self._messages.append(response.message)
-        return response
-
-    def _start_judge_evaluations(
-        self,
-        messages: List[LDMessage],
-        response: ChatResponse,
-    ) -> List[asyncio.Task[Optional[JudgeResponse]]]:
-        """
-        Start judge evaluations as async tasks without awaiting them.
-        
-        Returns a list of async tasks that can be awaited later.
-        
-        :param messages: Array of messages representing the conversation history
-        :param response: The AI response to be evaluated
-        :return: List of async tasks that will return judge evaluation results
-        """
-        if not self._ai_config.judge_configuration or not self._ai_config.judge_configuration.judges:
-            return []
-
-        judge_configs = self._ai_config.judge_configuration.judges
-
-        # Start all judge evaluations as tasks
-        async def evaluate_judge(judge_config):
-            judge = self._judges.get(judge_config.key)
-            if not judge:
-                if self._logger:
-                    self._logger.warn(
-                        f"Judge configuration is not enabled: {judge_config.key}",
-                    )
-                return None
-
-            eval_result = await judge.evaluate_messages(
-                messages, response, judge_config.sampling_rate
-            )
-
-            if eval_result and eval_result.success:
-                self._tracker.track_eval_scores(eval_result.evals)
-
-            return eval_result
-
-        # Create tasks for each judge evaluation
-        tasks = [
-            asyncio.create_task(evaluate_judge(judge_config))
-            for judge_config in judge_configs
-        ]
-        
-        return tasks
-
-    def get_config(self) -> AICompletionConfig:
-        """
-        Get the underlying AI configuration used to initialize this TrackedChat.
-        
-        :return: The AI completion configuration
-        """
-        return self._ai_config
-
-    def get_tracker(self) -> LDAIConfigTracker:
-        """
-        Get the underlying AI configuration tracker used to initialize this TrackedChat.
-        
-        :return: The tracker instance
-        """
-        return self._tracker
-
-    def get_provider(self) -> AIProvider:
-        """
-        Get the underlying AI provider instance.
-        
-        This provides direct access to the provider for advanced use cases.
-        
-        :return: The AI provider instance
-        """
-        return self._provider
-
-    def get_judges(self) -> Dict[str, AIJudge]:
-        """
-        Get the judges associated with this TrackedChat.
-        
-        Returns a dictionary of judge instances keyed by their configuration keys.
-        
-        :return: Dictionary of judge instances
-        """
-        return self._judges
-
-    def append_messages(self, messages: List[LDMessage]) -> None:
-        """
-        Append messages to the conversation history.
-        
-        Adds messages to the conversation history without invoking the model,
-        which is useful for managing multi-turn conversations or injecting context.
-        
-        :param messages: Array of messages to append to the conversation history
-        """
-        self._messages.extend(messages)
-
-    def get_messages(self, include_config_messages: bool = False) -> List[LDMessage]:
-        """
-        Get all messages in the conversation history.
-        
-        :param include_config_messages: Whether to include the config messages from the AIConfig.
-                                       Defaults to False.
-        :return: Array of messages. When include_config_messages is True, returns both config
-                messages and conversation history with config messages prepended. When False,
-                returns only the conversation history messages.
-        """
-        if include_config_messages:
-            config_messages = self._ai_config.messages or []
-            return config_messages + self._messages
-        return list(self._messages)
-
diff --git a/ldai/client.py b/ldai/client.py
deleted file mode 100644
index 4f4b6b9..0000000
--- a/ldai/client.py
+++ /dev/null
@@ -1,578 +0,0 @@
-import logging
-from typing import Any, Dict, List, Optional, Tuple
-
-import chevron
-from ldclient import Context
-from ldclient.client import LDClient
-
-from ldai.chat import TrackedChat
-from ldai.judge import AIJudge
-from ldai.models import (
-    AIAgentConfig,
-    AIAgentConfigDefault,
-    AIAgentConfigRequest,
-    AIAgents,
-    AICompletionConfig,
-    AICompletionConfigDefault,
-    AIJudgeConfig,
-    AIJudgeConfigDefault,
-    JudgeConfiguration,
-    LDMessage,
-    ModelConfig,
-    ProviderConfig,
-)
-from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider
-from ldai.tracker import LDAIConfigTracker
-
-
-class LDAIClient:
-    """The LaunchDarkly AI SDK client object."""
-
-    def __init__(self, client: LDClient):
-        self._client = client
-        self._logger = logging.getLogger('ldclient.ai')
-
-    def completion_config(
-        self,
-        key: str,
-        context: Context,
-        default_value: AICompletionConfigDefault,
-        variables: Optional[Dict[str, Any]] = None,
-    ) -> AICompletionConfig:
-        """
-        Get the value of a completion configuration.
-
-        :param key: The key of the completion configuration.
-        :param context: The context to evaluate the completion configuration in.
-        :param default_value: The default value of the completion configuration.
-        :param variables: Additional variables for the completion configuration.
-        :return: The completion configuration with a tracker used for gathering metrics.
-        """
-        self._client.track('$ld:ai:config:function:single', context, key, 1)
-
-        model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate(
-            key, context, default_value.to_dict(), variables
-        )
-
-        config = AICompletionConfig(
-            enabled=bool(enabled),
-            model=model,
-            messages=messages,
-            provider=provider,
-            tracker=tracker,
-            judge_configuration=judge_configuration,
-        )
-
-        return config
-
-    def config(
-        self,
-        key: str,
-        context: Context,
-        default_value: AICompletionConfigDefault,
-        variables: Optional[Dict[str, Any]] = None,
-    ) -> AICompletionConfig:
-        """
-        Get the value of a model configuration.
-
-        .. deprecated:: Use :meth:`completion_config` instead. This method will be removed in a future version.
-
-        :param key: The key of the model configuration.
-        :param context: The context to evaluate the model configuration in.
-        :param default_value: The default value of the model configuration.
-        :param variables: Additional variables for the model configuration.
-        :return: The value of the model configuration along with a tracker used for gathering metrics.
-        """
-        return self.completion_config(key, context, default_value, variables)
-
-    def judge_config(
-        self,
-        key: str,
-        context: Context,
-        default_value: AIJudgeConfigDefault,
-        variables: Optional[Dict[str, Any]] = None,
-    ) -> AIJudgeConfig:
-        """
-        Get the value of a judge configuration.
-
-        :param key: The key of the judge configuration.
-        :param context: The context to evaluate the judge configuration in.
-        :param default_value: The default value of the judge configuration.
-        :param variables: Additional variables for the judge configuration.
-        :return: The judge configuration with a tracker used for gathering metrics.
-        """
-        self._client.track('$ld:ai:judge:function:single', context, key, 1)
-
-        model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate(
-            key, context, default_value.to_dict(), variables
-        )
-
-        # Extract evaluation_metric_keys from the variation
-        variation = self._client.variation(key, context, default_value.to_dict())
-        evaluation_metric_keys = variation.get('evaluationMetricKeys', default_value.evaluation_metric_keys or [])
-
-        config = AIJudgeConfig(
-            enabled=bool(enabled),
-            evaluation_metric_keys=evaluation_metric_keys,
-            model=model,
-            messages=messages,
-            provider=provider,
-            tracker=tracker,
-        )
-
-        return config
-
-    async def create_judge(
-        self,
-        key: str,
-        context: Context,
-        default_value: AIJudgeConfigDefault,
-        variables: Optional[Dict[str, Any]] = None,
-        default_ai_provider: Optional[SupportedAIProvider] = None,
-    ) -> Optional[AIJudge]:
-        """
-        Creates and returns a new Judge instance for AI evaluation.
-
-        :param key: The key identifying the AI judge configuration to use
-        :param context: Standard Context used when evaluating flags
-        :param default_value: A default value representing a standard AI config result
-        :param variables: Dictionary of values for instruction interpolation.
-            The variables `message_history` and `response_to_evaluate` are reserved for the judge and will be ignored.
-        :param default_ai_provider: Optional default AI provider to use.
-        :return: Judge instance or None if disabled/unsupported
-
-        Example::
-
-            judge = client.create_judge(
-                "relevance-judge",
-                context,
-                AIJudgeConfigDefault(
-                    enabled=True,
-                    model=ModelConfig("gpt-4"),
-                    provider=ProviderConfig("openai"),
-                    evaluation_metric_keys=['$ld:ai:judge:relevance'],
-                    messages=[LDMessage(role='system', content='You are a relevance judge.')]
-                ),
-                variables={'metric': "relevance"}
-            )
-
-            if judge:
-                result = await judge.evaluate("User question", "AI response")
-                if result and result.evals:
-                    relevance_eval = result.evals.get('$ld:ai:judge:relevance')
-                    if relevance_eval:
-                        print('Relevance score:', relevance_eval.score)
-        """
-        self._client.track('$ld:ai:judge:function:createJudge', context, key, 1)
-
-        try:
-            # Warn if reserved variables are provided
-            if variables:
-                if 'message_history' in variables:
-                    # Note: Python doesn't have a logger on the client, but we could add one
-                    pass  # Would log warning if logger available
-                if 'response_to_evaluate' in variables:
-                    pass  # Would log warning if logger available
-
-            # Overwrite reserved variables to ensure they remain as placeholders for judge evaluation
-            extended_variables = dict(variables) if variables else {}
-            extended_variables['message_history'] = '{{message_history}}'
-            extended_variables['response_to_evaluate'] = '{{response_to_evaluate}}'
-
-            judge_config = self.judge_config(key, context, default_value, extended_variables)
-
-            if not judge_config.enabled or not judge_config.tracker:
-                # Would log info if logger available
-                return None
-
-            # Create AI provider for the judge
-            provider = await AIProviderFactory.create(judge_config, self._logger, default_ai_provider)
-            if not provider:
-                return None
-
-            return AIJudge(judge_config, judge_config.tracker, provider, self._logger)
-        except Exception as error:
-            # Would log error if logger available
-            return None
-
-    async def _initialize_judges(
-        self,
-        judge_configs: List[JudgeConfiguration.Judge],
-        context: Context,
-        variables: Optional[Dict[str, Any]] = None,
-        default_ai_provider: Optional[SupportedAIProvider] = None,
-    ) -> Dict[str, AIJudge]:
-        """
-        Initialize judges from judge configurations.
-        
-        :param judge_configs: List of judge configurations
-        :param context: Standard Context used when evaluating flags
-        :param variables: Dictionary of values for instruction interpolation
-        :param default_ai_provider: Optional default AI provider to use
-        :return: Dictionary of judge instances keyed by their configuration keys
-        """
-        judges: Dict[str, AIJudge] = {}
-        
-        async def create_judge_for_config(judge_key: str):
-            judge = await self.create_judge(
-                judge_key,
-                context,
-                AIJudgeConfigDefault(enabled=False),
-                variables,
-                default_ai_provider,
-            )
-            return judge_key, judge
-        
-        judge_promises = [
-            create_judge_for_config(judge_config.key)
-            for judge_config in judge_configs
-        ]
-        
-        import asyncio
-        results = await asyncio.gather(*judge_promises, return_exceptions=True)
-        
-        for result in results:
-            if isinstance(result, Exception):
-                continue
-            judge_key, judge = result
-            if judge:
-                judges[judge_key] = judge
-        
-        return judges
-
-    async def create_chat(
-        self,
-        key: str,
-        context: Context,
-        default_value: AICompletionConfigDefault,
-        variables: Optional[Dict[str, Any]] = None,
-        default_ai_provider: Optional[SupportedAIProvider] = None,
-    ) -> Optional[TrackedChat]:
-        """
-        Creates and returns a new TrackedChat instance for AI chat conversations.
-
-        :param key: The key identifying the AI completion configuration to use
-        :param context: Standard Context used when evaluating flags
-        :param default_value: A default value representing a standard AI config result
-        :param variables: Dictionary of values for instruction interpolation
-        :param default_ai_provider: Optional default AI provider to use
-        :return: TrackedChat instance or None if disabled/unsupported
-
-        Example::
-
-            chat = await client.create_chat(
-                "customer-support-chat",
-                context,
-                AICompletionConfigDefault(
-                    enabled=True,
-                    model=ModelConfig("gpt-4"),
-                    provider=ProviderConfig("openai"),
-                    messages=[LDMessage(role='system', content='You are a helpful assistant.')]
-                ),
-                variables={'customerName': 'John'}
-            )
-
-            if chat:
-                response = await chat.invoke("I need help with my order")
-                print(response.message.content)
-                
-                # Access conversation history
-                messages = chat.get_messages()
-                print(f"Conversation has {len(messages)} messages")
-        """
-        self._client.track('$ld:ai:config:function:createChat', context, key, 1)
-        if self._logger:
-            self._logger.debug(f"Creating chat for key: {key}")
-        config = self.completion_config(key, context, default_value, variables)
-
-        if not config.enabled or not config.tracker:
-            # Would log info if logger available
-            return None
-
-        provider = await AIProviderFactory.create(config, self._logger, default_ai_provider)
-        if not provider:
-            return None
-
-        judges = {}
-        if config.judge_configuration and config.judge_configuration.judges:
-            judges = await self._initialize_judges(
-                config.judge_configuration.judges,
-                context,
-                variables,
-                default_ai_provider,
-            )
-
-        return TrackedChat(config, config.tracker, provider, judges, self._logger)
-
-    def agent_config(
-        self,
-        key: str,
-        context: Context,
-        default_value: AIAgentConfigDefault,
-        variables: Optional[Dict[str, Any]] = None,
-    ) -> AIAgentConfig:
-        """
-        Retrieve a single AI Config agent.
-
-        This method retrieves a single agent configuration with instructions
-        dynamically interpolated using the provided variables and context data.
-
-        Example::
-
-            agent = client.agent_config(
-                'research_agent',
-                context,
-                AIAgentConfigDefault(
-                    enabled=True,
-                    model=ModelConfig('gpt-4'),
-                    instructions="You are a research assistant specializing in {{topic}}."
-                ),
-                variables={'topic': 'climate change'}
-            )
-
-            if agent.enabled:
-                research_result = agent.instructions  # Interpolated instructions
-                agent.tracker.track_success()
-
-        :param key: The agent configuration key.
-        :param context: The context to evaluate the agent configuration in.
-        :param default_value: Default agent values.
-        :param variables: Variables for interpolation.
-        :return: Configured AIAgentConfig instance.
-        """
-        # Track single agent usage
-        self._client.track(
-            "$ld:ai:agent:function:single",
-            context,
-            key,
-            1
-        )
-
-        return self.__evaluate_agent(key, context, default_value, variables)
-
-    def agent(
-        self,
-        config: AIAgentConfigRequest,
-        context: Context,
-    ) -> AIAgentConfig:
-        """
-        Retrieve a single AI Config agent.
-
-        .. deprecated:: Use :meth:`agent_config` instead. This method will be removed in a future version.
-
-        :param config: The agent configuration to use.
-        :param context: The context to evaluate the agent configuration in.
-        :return: Configured AIAgentConfig instance.
-        """
-        return self.agent_config(config.key, context, config.default_value, config.variables)
-
-    def agent_configs(
-        self,
-        agent_configs: List[AIAgentConfigRequest],
-        context: Context,
-    ) -> AIAgents:
-        """
-        Retrieve multiple AI agent configurations.
-
-        This method allows you to retrieve multiple agent configurations in a single call,
-        with each agent having its own default configuration and variables for instruction
-        interpolation.
-
-        Example::
-
-            agents = client.agent_configs([
-                AIAgentConfigRequest(
-                    key='research_agent',
-                    default_value=AIAgentConfigDefault(
-                        enabled=True,
-                        instructions='You are a research assistant.'
-                    ),
-                    variables={'topic': 'climate change'}
-                ),
-                AIAgentConfigRequest(
-                    key='writing_agent',
-                    default_value=AIAgentConfigDefault(
-                        enabled=True,
-                        instructions='You are a writing assistant.'
-                    ),
-                    variables={'style': 'academic'}
-                )
-            ], context)
-
-            research_result = agents["research_agent"].instructions
-            agents["research_agent"].tracker.track_success()
-
-        :param agent_configs: List of agent configurations to retrieve.
-        :param context: The context to evaluate the agent configurations in.
-        :return: Dictionary mapping agent keys to their AIAgentConfig configurations.
-        """
-        # Track multiple agents usage
-        agent_count = len(agent_configs)
-        self._client.track(
-            "$ld:ai:agent:function:multiple",
-            context,
-            agent_count,
-            agent_count
-        )
-
-        result: AIAgents = {}
-
-        for config in agent_configs:
-            agent = self.__evaluate_agent(
-                config.key,
-                context,
-                config.default_value,
-                config.variables
-            )
-            result[config.key] = agent
-
-        return result
-
-    def agents(
-        self,
-        agent_configs: List[AIAgentConfigRequest],
-        context: Context,
-    ) -> AIAgents:
-        """
-        Retrieve multiple AI agent configurations.
-
-        .. deprecated:: Use :meth:`agent_configs` instead. This method will be removed in a future version.
-
-        :param agent_configs: List of agent configurations to retrieve.
-        :param context: The context to evaluate the agent configurations in.
-        :return: Dictionary mapping agent keys to their AIAgentConfig configurations.
-        """
-        return self.agent_configs(agent_configs, context)
-
-    def __evaluate(
-        self,
-        key: str,
-        context: Context,
-        default_dict: Dict[str, Any],
-        variables: Optional[Dict[str, Any]] = None,
-    ) -> Tuple[Optional[ModelConfig], Optional[ProviderConfig], Optional[List[LDMessage]], Optional[str], LDAIConfigTracker, bool, Optional[Any]]:
-        """
-        Internal method to evaluate a configuration and extract components.
-
-        :param key: The configuration key.
-        :param context: The evaluation context.
-        :param default_dict: Default configuration as dictionary.
-        :param variables: Variables for interpolation.
-        :return: Tuple of (model, provider, messages, instructions, tracker, enabled).
-        """
-        variation = self._client.variation(key, context, default_dict)
-
-        all_variables = {}
-        if variables:
-            all_variables.update(variables)
-        all_variables['ldctx'] = context.to_dict()
-
-        # Extract messages
-        messages = None
-        if 'messages' in variation and isinstance(variation['messages'], list) and all(
-            isinstance(entry, dict) for entry in variation['messages']
-        ):
-            messages = [
-                LDMessage(
-                    role=entry['role'],
-                    content=self.__interpolate_template(
-                        entry['content'], all_variables
-                    ),
-                )
-                for entry in variation['messages']
-            ]
-
-        # Extract instructions
-        instructions = None
-        if 'instructions' in variation and isinstance(variation['instructions'], str):
-            instructions = self.__interpolate_template(variation['instructions'], all_variables)
-
-        # Extract provider config
-        provider_config = None
-        if 'provider' in variation and isinstance(variation['provider'], dict):
-            provider = variation['provider']
-            provider_config = ProviderConfig(provider.get('name', ''))
-
-        # Extract model config
-        model = None
-        if 'model' in variation and isinstance(variation['model'], dict):
-            parameters = variation['model'].get('parameters', None)
-            custom = variation['model'].get('custom', None)
-            model = ModelConfig(
-                name=variation['model']['name'],
-                parameters=parameters,
-                custom=custom
-            )
-
-        # Create tracker
-        tracker = LDAIConfigTracker(
-            self._client,
-            variation.get('_ldMeta', {}).get('variationKey', ''),
-            key,
-            int(variation.get('_ldMeta', {}).get('version', 1)),
-            model.name if model else '',
-            provider_config.name if provider_config else '',
-            context,
-        )
-
-        enabled = variation.get('_ldMeta', {}).get('enabled', False)
-
-        # Extract judge configuration
-        judge_configuration = None
-        if 'judgeConfiguration' in variation and isinstance(variation['judgeConfiguration'], dict):
-            judge_config = variation['judgeConfiguration']
-            if 'judges' in judge_config and isinstance(judge_config['judges'], list):
-                judges = [
-                    JudgeConfiguration.Judge(
-                        key=judge['key'],
-                        sampling_rate=judge['samplingRate']
-                    )
-                    for judge in judge_config['judges']
-                    if isinstance(judge, dict) and 'key' in judge and 'samplingRate' in judge
-                ]
-                if judges:
-                    judge_configuration = JudgeConfiguration(judges=judges)
-
-        return model, provider_config, messages, instructions, tracker, enabled, judge_configuration
-
-    def __evaluate_agent(
-        self,
-        key: str,
-        context: Context,
-        default_value: AIAgentConfigDefault,
-        variables: Optional[Dict[str, Any]] = None,
-    ) -> AIAgentConfig:
-        """
-        Internal method to evaluate an agent configuration.
-
-        :param key: The agent configuration key.
-        :param context: The evaluation context.
-        :param default_value: Default agent values.
-        :param variables: Variables for interpolation.
-        :return: Configured AIAgentConfig instance.
-        """
-        model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate(
-            key, context, default_value.to_dict(), variables
-        )
-
-        # For agents, prioritize instructions over messages
-        final_instructions = instructions if instructions is not None else default_value.instructions
-
-        return AIAgentConfig(
-            enabled=bool(enabled) if enabled is not None else (default_value.enabled or False),
-            model=model or default_value.model,
-            provider=provider or default_value.provider,
-            instructions=final_instructions,
-            tracker=tracker,
-            judge_configuration=judge_configuration or default_value.judge_configuration,
-        )
-
-    def __interpolate_template(self, template: str, variables: Dict[str, Any]) -> str:
-        """
-        Interpolate the template with the given variables using Mustache format.
-
-        :param template: The template string.
-        :param variables: The variables to interpolate into the template.
-        :return: The interpolated string.
-        """
-        return chevron.render(template, variables)
diff --git a/ldai/judge/__init__.py b/ldai/judge/__init__.py
deleted file mode 100644
index 4ab4df4..0000000
--- a/ldai/judge/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-"""Judge module for LaunchDarkly AI SDK."""
-
-from ldai.judge.ai_judge import AIJudge
-
-__all__ = ['AIJudge']
-
-
diff --git a/ldai/judge/ai_judge.py b/ldai/judge/ai_judge.py
deleted file mode 100644
index 20efbf8..0000000
--- a/ldai/judge/ai_judge.py
+++ /dev/null
@@ -1,230 +0,0 @@
-"""Judge implementation for AI evaluation."""
-
-import random
-from typing import Any, Dict, Optional
-
-import chevron
-
-from ldai.models import AIJudgeConfig, LDMessage
-from ldai.providers.ai_provider import AIProvider
-from ldai.providers.types import ChatResponse, EvalScore, JudgeResponse, StructuredResponse
-from ldai.tracker import LDAIConfigTracker
-from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder
-
-
-class AIJudge:
-    """
-    Judge implementation that handles evaluation functionality and conversation management.
-    
-    According to the AIEval spec, judges are AI Configs with mode: "judge" that evaluate
-    other AI Configs using structured output.
-    """
-
-    def __init__(
-        self,
-        ai_config: AIJudgeConfig,
-        ai_config_tracker: LDAIConfigTracker,
-        ai_provider: AIProvider,
-        logger: Optional[Any] = None,
-    ):
-        """
-        Initialize the Judge.
-        
-        :param ai_config: The judge AI configuration
-        :param ai_config_tracker: The tracker for the judge configuration
-        :param ai_provider: The AI provider to use for evaluation
-        :param logger: Optional logger for logging
-        """
-        self._ai_config = ai_config
-        self._ai_config_tracker = ai_config_tracker
-        self._ai_provider = ai_provider
-        self._logger = logger
-        self._evaluation_response_structure = EvaluationSchemaBuilder.build(
-            ai_config.evaluation_metric_keys
-        )
-
-    async def evaluate(
-        self,
-        input_text: str,
-        output_text: str,
-        sampling_rate: float = 1.0,
-    ) -> Optional[JudgeResponse]:
-        """
-        Evaluates an AI response using the judge's configuration.
-        
-        :param input_text: The input prompt or question that was provided to the AI
-        :param output_text: The AI-generated response to be evaluated
-        :param sampling_rate: Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1)
-        :return: Evaluation results or None if not sampled
-        """
-        try:
-            if not self._ai_config.evaluation_metric_keys or len(self._ai_config.evaluation_metric_keys) == 0:
-                if self._logger:
-                    self._logger.warn(
-                        'Judge configuration is missing required evaluationMetricKeys'
-                    )
-                return None
-
-            if not self._ai_config.messages:
-                if self._logger:
-                    self._logger.warn('Judge configuration must include messages')
-                return None
-
-            if random.random() > sampling_rate:
-                if self._logger:
-                    self._logger.debug(f'Judge evaluation skipped due to sampling rate: {sampling_rate}')
-                return None
-
-            messages = self._construct_evaluation_messages(input_text, output_text)
-
-            # Track metrics of the structured model invocation
-            response = await self._ai_config_tracker.track_metrics_of(
-                lambda result: result.metrics,
-                lambda: self._ai_provider.invoke_structured_model(messages, self._evaluation_response_structure)
-            )
-
-            success = response.metrics.success
-
-            evals = self._parse_evaluation_response(response.data)
-
-            if len(evals) != len(self._ai_config.evaluation_metric_keys):
-                if self._logger:
-                    self._logger.warn('Judge evaluation did not return all evaluations')
-                success = False
-
-            return JudgeResponse(
-                evals=evals,
-                success=success,
-            )
-        except Exception as error:
-            if self._logger:
-                self._logger.error(f'Judge evaluation failed: {error}')
-            return JudgeResponse(
-                evals={},
-                success=False,
-                error=str(error) if isinstance(error, Exception) else 'Unknown error',
-            )
-
-    async def evaluate_messages(
-        self,
-        messages: list[LDMessage],
-        response: ChatResponse,
-        sampling_ratio: float = 1.0,
-    ) -> Optional[JudgeResponse]:
-        """
-        Evaluates an AI response from chat messages and response.
-        
-        :param messages: Array of messages representing the conversation history
-        :param response: The AI response to be evaluated
-        :param sampling_ratio: Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1)
-        :return: Evaluation results or None if not sampled
-        """
-        input_text = '\r\n'.join([msg.content for msg in messages]) if messages else ''
-        output_text = response.message.content
-
-        return await self.evaluate(input_text, output_text, sampling_ratio)
-
-    def get_ai_config(self) -> AIJudgeConfig:
-        """
-        Returns the AI Config used by this judge.
-        
-        :return: The judge AI configuration
-        """
-        return self._ai_config
-
-    def get_tracker(self) -> LDAIConfigTracker:
-        """
-        Returns the tracker associated with this judge.
-        
-        :return: The tracker for the judge configuration
-        """
-        return self._ai_config_tracker
-
-    def get_provider(self) -> AIProvider:
-        """
-        Returns the AI provider used by this judge.
-        
-        :return: The AI provider
-        """
-        return self._ai_provider
-
-    def _construct_evaluation_messages(self, input_text: str, output_text: str) -> list[LDMessage]:
-        """
-        Constructs evaluation messages by combining judge's config messages with input/output.
-        
-        :param input_text: The input text
-        :param output_text: The output text to evaluate
-        :return: List of messages for evaluation
-        """
-        if not self._ai_config.messages:
-            return []
-
-        messages: list[LDMessage] = []
-        for msg in self._ai_config.messages:
-            # Interpolate message content with reserved variables
-            content = self._interpolate_message(msg.content, {
-                'message_history': input_text,
-                'response_to_evaluate': output_text,
-            })
-            messages.append(LDMessage(role=msg.role, content=content))
-
-        return messages
-
-    def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str:
-        """
-        Interpolates message content with variables using Mustache templating.
-        
-        :param content: The message content template
-        :param variables: Variables to interpolate
-        :return: Interpolated message content
-        """
-        # Use chevron (Mustache) for templating, with no escaping
-        return chevron.render(content, variables)
-
-    def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScore]:
-        """
-        Parses the structured evaluation response from the AI provider.
-        
-        :param data: The structured response data
-        :return: Dictionary of evaluation scores keyed by metric key
-        """
-        results: Dict[str, EvalScore] = {}
-
-        if not data.get('evaluations') or not isinstance(data['evaluations'], dict):
-            if self._logger:
-                self._logger.warn('Invalid response: missing or invalid evaluations object')
-            return results
-
-        evaluations = data['evaluations']
-
-        for metric_key in self._ai_config.evaluation_metric_keys:
-            evaluation = evaluations.get(metric_key)
-
-            if not evaluation or not isinstance(evaluation, dict):
-                if self._logger:
-                    self._logger.warn(f'Missing evaluation for metric key: {metric_key}')
-                continue
-
-            score = evaluation.get('score')
-            reasoning = evaluation.get('reasoning')
-
-            if not isinstance(score, (int, float)) or score < 0 or score > 1:
-                if self._logger:
-                    self._logger.warn(
-                        f'Invalid score evaluated for {metric_key}: {score}. '
-                        'Score must be a number between 0 and 1 inclusive'
-                    )
-                continue
-
-            if not isinstance(reasoning, str):
-                if self._logger:
-                    self._logger.warn(
-                        f'Invalid reasoning evaluated for {metric_key}: {reasoning}. '
-                        'Reasoning must be a string'
-                    )
-                continue
-
-            results[metric_key] = EvalScore(score=float(score), reasoning=reasoning)
-
-        return results
-
diff --git a/ldai/judge/evaluation_schema_builder.py b/ldai/judge/evaluation_schema_builder.py
deleted file mode 100644
index 1965e64..0000000
--- a/ldai/judge/evaluation_schema_builder.py
+++ /dev/null
@@ -1,75 +0,0 @@
-"""Internal class for building dynamic evaluation response schemas."""
-
-from typing import Any, Dict
-
-
-class EvaluationSchemaBuilder:
-    """
-    Internal class for building dynamic evaluation response schemas.
-    Not exported - only used internally by Judge.
-    """
-
-    @staticmethod
-    def build(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
-        """
-        Build an evaluation response schema from evaluation metric keys.
-        
-        :param evaluation_metric_keys: List of evaluation metric keys
-        :return: Schema dictionary for structured output
-        """
-        return {
-            'title': 'EvaluationResponse',
-            'description': f"Response containing evaluation results for {', '.join(evaluation_metric_keys)} metrics",
-            'type': 'object',
-            'properties': {
-                'evaluations': {
-                    'type': 'object',
-                    'description': f"Object containing evaluation results for {', '.join(evaluation_metric_keys)} metrics",
-                    'properties': EvaluationSchemaBuilder._build_key_properties(evaluation_metric_keys),
-                    'required': evaluation_metric_keys,
-                    'additionalProperties': False,
-                },
-            },
-            'required': ['evaluations'],
-            'additionalProperties': False,
-        }
-
-    @staticmethod
-    def _build_key_properties(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
-        """
-        Build properties for each evaluation metric key.
-        
-        :param evaluation_metric_keys: List of evaluation metric keys
-        :return: Dictionary of properties for each key
-        """
-        result: Dict[str, Any] = {}
-        for key in evaluation_metric_keys:
-            result[key] = EvaluationSchemaBuilder._build_key_schema(key)
-        return result
-
-    @staticmethod
-    def _build_key_schema(key: str) -> Dict[str, Any]:
-        """
-        Build schema for a single evaluation metric key.
-        
-        :param key: Evaluation metric key
-        :return: Schema dictionary for the key
-        """
-        return {
-            'type': 'object',
-            'properties': {
-                'score': {
-                    'type': 'number',
-                    'minimum': 0,
-                    'maximum': 1,
-                    'description': f'Score between 0.0 and 1.0 for {key}',
-                },
-                'reasoning': {
-                    'type': 'string',
-                    'description': f'Reasoning behind the score for {key}',
-                },
-            },
-            'required': ['score', 'reasoning'],
-            'additionalProperties': False,
-        }
-
diff --git a/ldai/models.py b/ldai/models.py
deleted file mode 100644
index fa36f8c..0000000
--- a/ldai/models.py
+++ /dev/null
@@ -1,363 +0,0 @@
-import warnings
-from dataclasses import dataclass, field
-from typing import Any, Dict, List, Literal, Optional, Union
-
-from ldai.tracker import LDAIConfigTracker
-
-
-@dataclass
-class LDMessage:
-    role: Literal['system', 'user', 'assistant']
-    content: str
-
-    def to_dict(self) -> dict:
-        """
-        Render the given message as a dictionary object.
-        """
-        return {
-            'role': self.role,
-            'content': self.content,
-        }
-
-
-class ModelConfig:
-    """
-    Configuration related to the model.
-    """
-
-    def __init__(self, name: str, parameters: Optional[Dict[str, Any]] = None, custom: Optional[Dict[str, Any]] = None):
-        """
-        :param name: The name of the model.
-        :param parameters: Additional model-specific parameters.
-        :param custom: Additional customer provided data.
-        """
-        self._name = name
-        self._parameters = parameters
-        self._custom = custom
-
-    @property
-    def name(self) -> str:
-        """
-        The name of the model.
-        """
-        return self._name
-
-    def get_parameter(self, key: str) -> Any:
-        """
-        Retrieve model-specific parameters.
-
-        Accessing a named, typed attribute (e.g. name) will result in the call
-        being delegated to the appropriate property.
-        """
-        if key == 'name':
-            return self.name
-
-        if self._parameters is None:
-            return None
-
-        return self._parameters.get(key)
-
-    def get_custom(self, key: str) -> Any:
-        """
-        Retrieve customer provided data.
-        """
-        if self._custom is None:
-            return None
-
-        return self._custom.get(key)
-
-    def to_dict(self) -> dict:
-        """
-        Render the given model config as a dictionary object.
-        """
-        return {
-            'name': self._name,
-            'parameters': self._parameters,
-            'custom': self._custom,
-        }
-
-
-class ProviderConfig:
-    """
-    Configuration related to the provider.
-    """
-
-    def __init__(self, name: str):
-        self._name = name
-
-    @property
-    def name(self) -> str:
-        """
-        The name of the provider.
-        """
-        return self._name
-
-    def to_dict(self) -> dict:
-        """
-        Render the given provider config as a dictionary object.
-        """
-        return {
-            'name': self._name,
-        }
-
-
-# ============================================================================
-# Judge Types
-# ============================================================================
-
-@dataclass(frozen=True)
-class JudgeConfiguration:
-    """
-    Configuration for judge attachment to AI Configs.
-    """
-    
-    @dataclass(frozen=True)
-    class Judge:
-        """
-        Configuration for a single judge attachment.
-        """
-        key: str
-        sampling_rate: float
-
-        def to_dict(self) -> dict:
-            """
-            Render the judge as a dictionary object.
-            """
-            return {
-                'key': self.key,
-                'samplingRate': self.sampling_rate,
-            }
-    
-    judges: List['JudgeConfiguration.Judge']
-
-    def to_dict(self) -> dict:
-        """
-        Render the judge configuration as a dictionary object.
-        """
-        return {
-            'judges': [judge.to_dict() for judge in self.judges],
-        }
-
-
-# ============================================================================
-# Base AI Config Types
-# ============================================================================
-
-@dataclass(frozen=True)
-class AIConfigDefault:
-    """
-    Base AI Config interface for default implementations with optional enabled property.
-    """
-    enabled: Optional[bool] = None
-    model: Optional[ModelConfig] = None
-    provider: Optional[ProviderConfig] = None
-
-    def _base_to_dict(self) -> Dict[str, Any]:
-        """
-        Render the base config fields as a dictionary object.
-        """
-        return {
-            '_ldMeta': {
-                'enabled': self.enabled or False,
-            },
-            'model': self.model.to_dict() if self.model else None,
-            'provider': self.provider.to_dict() if self.provider else None,
-        }
-
-
-@dataclass(frozen=True)
-class AIConfig:
-    """
-    Base AI Config interface without mode-specific fields.
-    """
-    enabled: bool
-    model: Optional[ModelConfig] = None
-    provider: Optional[ProviderConfig] = None
-    tracker: Optional[LDAIConfigTracker] = None
-
-    def _base_to_dict(self) -> Dict[str, Any]:
-        """
-        Render the base config fields as a dictionary object.
-        """
-        return {
-            '_ldMeta': {
-                'enabled': self.enabled,
-            },
-            'model': self.model.to_dict() if self.model else None,
-            'provider': self.provider.to_dict() if self.provider else None,
-        }
-
-
-# ============================================================================
-# Completion Config Types
-# ============================================================================
-
-@dataclass(frozen=True)
-class AICompletionConfigDefault(AIConfigDefault):
-    """
-    Default Completion AI Config (default mode).
-    """
-    messages: Optional[List[LDMessage]] = None
-    judge_configuration: Optional[JudgeConfiguration] = None
-
-    def to_dict(self) -> dict:
-        """
-        Render the given default values as an AICompletionConfigDefault-compatible dictionary object.
-        """
-        result = self._base_to_dict()
-        result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
-        if self.judge_configuration is not None:
-            result['judgeConfiguration'] = self.judge_configuration.to_dict()
-        return result
-
-
-@dataclass(frozen=True)
-class AICompletionConfig(AIConfig):
-    """
-    Completion AI Config (default mode).
-    """
-    messages: Optional[List[LDMessage]] = None
-    judge_configuration: Optional[JudgeConfiguration] = None
-
-    def to_dict(self) -> dict:
-        """
-        Render the given completion config as a dictionary object.
-        """
-        result = self._base_to_dict()
-        result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
-        if self.judge_configuration is not None:
-            result['judgeConfiguration'] = self.judge_configuration.to_dict()
-        return result
-
-
-# ============================================================================
-# Agent Config Types
-# ============================================================================
-
-@dataclass(frozen=True)
-class AIAgentConfigDefault(AIConfigDefault):
-    """
-    Default Agent-specific AI Config with instructions.
-    """
-    instructions: Optional[str] = None
-    judge_configuration: Optional[JudgeConfiguration] = None
-
-    def to_dict(self) -> Dict[str, Any]:
-        """
-        Render the given agent config default as a dictionary object.
-        """
-        result = self._base_to_dict()
-        if self.instructions is not None:
-            result['instructions'] = self.instructions
-        if self.judge_configuration is not None:
-            result['judgeConfiguration'] = self.judge_configuration.to_dict()
-        return result
-
-
-@dataclass(frozen=True)
-class AIAgentConfig(AIConfig):
-    """
-    Agent-specific AI Config with instructions.
-    """
-    instructions: Optional[str] = None
-    judge_configuration: Optional[JudgeConfiguration] = None
-
-    def to_dict(self) -> Dict[str, Any]:
-        """
-        Render the given agent config as a dictionary object.
-        """
-        result = self._base_to_dict()
-        if self.instructions is not None:
-            result['instructions'] = self.instructions
-        if self.judge_configuration is not None:
-            result['judgeConfiguration'] = self.judge_configuration.to_dict()
-        return result
-
-
-# ============================================================================
-# Judge Config Types
-# ============================================================================
-
-@dataclass(frozen=True)
-class AIJudgeConfigDefault(AIConfigDefault):
-    """
-    Default Judge-specific AI Config with required evaluation metric key.
-    """
-    messages: Optional[List[LDMessage]] = None
-    evaluation_metric_keys: Optional[List[str]] = None
-
-    def to_dict(self) -> dict:
-        """
-        Render the given judge config default as a dictionary object.
-        """
-        result = self._base_to_dict()
-        result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
-        if self.evaluation_metric_keys is not None:
-            result['evaluationMetricKeys'] = self.evaluation_metric_keys
-        return result
-
-
-@dataclass(frozen=True)
-class AIJudgeConfig(AIConfig):
-    """
-    Judge-specific AI Config with required evaluation metric key.
-    """
-    evaluation_metric_keys: List[str] = field(default_factory=list)
-    messages: Optional[List[LDMessage]] = None
-
-    def to_dict(self) -> dict:
-        """
-        Render the given judge config as a dictionary object.
-        """
-        result = self._base_to_dict()
-        result['evaluationMetricKeys'] = self.evaluation_metric_keys
-        result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
-        return result
-
-
-# ============================================================================
-# Agent Request Config
-# ============================================================================
-
-@dataclass
-class AIAgentConfigRequest:
-    """
-    Configuration for a single agent request.
-
-    Combines agent key with its specific default configuration and variables.
-    """
-    key: str
-    default_value: AIAgentConfigDefault
-    variables: Optional[Dict[str, Any]] = None
-
-
-# Type alias for multiple agents
-AIAgents = Dict[str, AIAgentConfig]
-
-# Type alias for all AI Config variants
-AIConfigKind = Union[AIAgentConfig, AICompletionConfig, AIJudgeConfig]
-
-
-# ============================================================================
-# Deprecated Type Aliases for Backward Compatibility
-# ============================================================================
-
-# Note: These are type aliases that point to the new types.
-# Since Python uses duck typing, these will work at runtime even if type checkers complain.
-# The old AIConfig had optional enabled, so it maps to AICompletionConfigDefault
-# The old AIConfig return type had required enabled, so it maps to AICompletionConfig
-
-# Deprecated: Use AICompletionConfigDefault instead
-# This was the old AIConfig with optional enabled (used as input/default)
-# Note: We map to AICompletionConfigDefault since the old AIConfig had optional enabled
-AIConfig = AICompletionConfigDefault
-
-# Deprecated: Use AIAgentConfigDefault instead
-LDAIAgentDefaults = AIAgentConfigDefault
-
-# Deprecated: Use AIAgentConfigRequest instead
-LDAIAgentConfig = AIAgentConfigRequest
-
-# Deprecated: Use AIAgentConfig instead (note: this was the old return type)
-LDAIAgent = AIAgentConfig
-
diff --git a/ldai/providers/__init__.py b/ldai/providers/__init__.py
deleted file mode 100644
index 1beffb4..0000000
--- a/ldai/providers/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-"""AI Provider interfaces and factory for LaunchDarkly AI SDK."""
-
-from ldai.providers.ai_provider import AIProvider
-from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider
-
-# Export LangChain provider if available
-try:
-    from ldai.providers.langchain import LangChainProvider
-    __all__ = [
-        'AIProvider',
-        'AIProviderFactory',
-        'LangChainProvider',
-        'SupportedAIProvider',
-    ]
-except ImportError:
-    __all__ = [
-        'AIProvider',
-        'AIProviderFactory',
-        'SupportedAIProvider',
-    ]
-
diff --git a/ldai/providers/ai_provider.py b/ldai/providers/ai_provider.py
deleted file mode 100644
index daf56c6..0000000
--- a/ldai/providers/ai_provider.py
+++ /dev/null
@@ -1,91 +0,0 @@
-"""Abstract base class for AI providers."""
-
-from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional, Union
-
-from ldai.models import AIConfigKind, LDMessage
-from ldai.providers.types import ChatResponse, LDAIMetrics, StructuredResponse
-
-
-class AIProvider(ABC):
-    """
-    Abstract base class for AI providers that implement chat model functionality.
-    
-    This class provides the contract that all provider implementations must follow
-    to integrate with LaunchDarkly's tracking and configuration capabilities.
-    
-    Following the AICHAT spec recommendation to use base classes with non-abstract methods
-    for better extensibility and backwards compatibility.
-    """
-
-    def __init__(self, logger: Optional[Any] = None):
-        """
-        Initialize the AI provider.
-        
-        :param logger: Optional logger for logging provider operations.
-        """
-        self.logger = logger
-
-    async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
-        """
-        Invoke the chat model with an array of messages.
-        
-        This method should convert messages to provider format, invoke the model,
-        and return a ChatResponse with the result and metrics.
-        
-        Default implementation takes no action and returns a placeholder response.
-        Provider implementations should override this method.
-        
-        :param messages: Array of LDMessage objects representing the conversation
-        :return: ChatResponse containing the model's response
-        """
-        if self.logger:
-            self.logger.warn('invokeModel not implemented by this provider')
-        
-        return ChatResponse(
-            message=LDMessage(role='assistant', content=''),
-            metrics=LDAIMetrics(success=False, usage=None),
-        )
-
-    async def invoke_structured_model(
-        self,
-        messages: List[LDMessage],
-        response_structure: Dict[str, Any],
-    ) -> StructuredResponse:
-        """
-        Invoke the chat model with structured output support.
-        
-        This method should convert messages to provider format, invoke the model with
-        structured output configuration, and return a structured response.
-        
-        Default implementation takes no action and returns a placeholder response.
-        Provider implementations should override this method.
-        
-        :param messages: Array of LDMessage objects representing the conversation
-        :param response_structure: Dictionary of output configurations keyed by output name
-        :return: StructuredResponse containing the structured data
-        """
-        if self.logger:
-            self.logger.warn('invokeStructuredModel not implemented by this provider')
-        
-        return StructuredResponse(
-            data={},
-            raw_response='',
-            metrics=LDAIMetrics(success=False, usage=None),
-        )
-
-    @staticmethod
-    @abstractmethod
-    async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'AIProvider':
-        """
-        Static method that constructs an instance of the provider.
-        
-        Each provider implementation must provide their own static create method
-        that accepts an AIConfigKind and returns a configured instance.
-        
-        :param ai_config: The LaunchDarkly AI configuration
-        :param logger: Optional logger for the provider
-        :return: Configured provider instance
-        """
-        raise NotImplementedError('Provider implementations must override the static create method')
-
diff --git a/ldai/providers/ai_provider_factory.py b/ldai/providers/ai_provider_factory.py
deleted file mode 100644
index bd17485..0000000
--- a/ldai/providers/ai_provider_factory.py
+++ /dev/null
@@ -1,169 +0,0 @@
-"""Factory for creating AIProvider instances based on the provider configuration."""
-
-import importlib
-from typing import Any, List, Literal, Optional, Type
-
-from ldai.models import AIConfigKind
-from ldai.providers.ai_provider import AIProvider
-
-
-# List of supported AI providers
-SUPPORTED_AI_PROVIDERS = [
-    # Multi-provider packages should be last in the list
-    'langchain',
-]
-
-# Type representing the supported AI providers
-SupportedAIProvider = Literal['langchain']
-
-
-class AIProviderFactory:
-    """
-    Factory for creating AIProvider instances based on the provider configuration.
-    """
-
-    @staticmethod
-    async def create(
-        ai_config: AIConfigKind,
-        logger: Optional[Any] = None,
-        default_ai_provider: Optional[SupportedAIProvider] = None,
-    ) -> Optional[AIProvider]:
-        """
-        Create an AIProvider instance based on the AI configuration.
-        
-        This method attempts to load provider-specific implementations dynamically.
-        Returns None if the provider is not supported.
-        
-        :param ai_config: The AI configuration
-        :param logger: Optional logger for logging provider initialization
-        :param default_ai_provider: Optional default AI provider to use
-        :return: AIProvider instance or None if not supported
-        """
-        provider_name = ai_config.provider.name.lower() if ai_config.provider else None
-        # Determine which providers to try based on default_ai_provider
-        providers_to_try = AIProviderFactory._get_providers_to_try(default_ai_provider, provider_name)
-
-        # Try each provider in order
-        for provider_type in providers_to_try:
-            provider = await AIProviderFactory._try_create_provider(provider_type, ai_config, logger)
-            if provider:
-                return provider
-
-        # If no provider was successfully created, log a warning
-        if logger:
-            logger.warn(
-                f"Provider is not supported or failed to initialize: {provider_name or 'unknown'}"
-            )
-        return None
-
-    @staticmethod
-    def _get_providers_to_try(
-        default_ai_provider: Optional[SupportedAIProvider],
-        provider_name: Optional[str],
-    ) -> List[SupportedAIProvider]:
-        """
-        Determine which providers to try based on default_ai_provider and provider_name.
-        
-        :param default_ai_provider: Optional default provider to use
-        :param provider_name: Optional provider name from config
-        :return: List of providers to try in order
-        """
-        # If default_ai_provider is set, only try that specific provider
-        if default_ai_provider:
-            return [default_ai_provider]
-
-        # If no default_ai_provider is set, try all providers in order
-        provider_set = set()
-
-        # First try the specific provider if it's supported
-        if provider_name and provider_name in SUPPORTED_AI_PROVIDERS:
-            provider_set.add(provider_name)  # type: ignore
-
-        # Then try multi-provider packages, but avoid duplicates
-        multi_provider_packages: List[SupportedAIProvider] = ['langchain']
-        for provider in multi_provider_packages:
-            provider_set.add(provider)
-
-        return list(provider_set)
-
-    @staticmethod
-    async def _try_create_provider(
-        provider_type: SupportedAIProvider,
-        ai_config: AIConfigKind,
-        logger: Optional[Any] = None,
-    ) -> Optional[AIProvider]:
-        """
-        Try to create a provider of the specified type.
-        
-        :param provider_type: Type of provider to create
-        :param ai_config: AI configuration
-        :param logger: Optional logger
-        :return: AIProvider instance or None if creation failed
-        """
-        # Handle built-in providers (part of this package)
-        if provider_type == 'langchain':
-            try:
-                from ldai.providers.langchain import LangChainProvider
-                return await LangChainProvider.create(ai_config, logger)
-            except ImportError as error:
-                if logger:
-                    logger.warn(
-                        f"Error creating LangChainProvider: {error}. "
-                        f"Make sure langchain and langchain-core packages are installed."
-                    )
-                return None
-
-        # TODO: REL-10773 OpenAI provider
-        # TODO: REL-10776 Vercel provider
-        # For future external providers, use dynamic import
-        provider_mappings = {
-            # 'openai': ('launchdarkly_server_sdk_ai_openai', 'OpenAIProvider'),
-            # 'vercel': ('launchdarkly_server_sdk_ai_vercel', 'VercelProvider'),
-        }
-
-        if provider_type not in provider_mappings:
-            return None
-
-        package_name, provider_class_name = provider_mappings[provider_type]
-        return await AIProviderFactory._create_provider(
-            package_name, provider_class_name, ai_config, logger
-        )
-
-    @staticmethod
-    async def _create_provider(
-        package_name: str,
-        provider_class_name: str,
-        ai_config: AIConfigKind,
-        logger: Optional[Any] = None,
-    ) -> Optional[AIProvider]:
-        """
-        Create a provider instance dynamically.
-        
-        :param package_name: Name of the package containing the provider
-        :param provider_class_name: Name of the provider class
-        :param ai_config: AI configuration
-        :param logger: Optional logger
-        :return: AIProvider instance or None if creation failed
-        """
-        try:
-            # Try to dynamically import the provider
-            # This will work if the package is installed
-            module = importlib.import_module(package_name)
-            provider_class: Type[AIProvider] = getattr(module, provider_class_name)
-
-            provider = await provider_class.create(ai_config, logger)
-            if logger:
-                logger.debug(
-                    f"Successfully created AIProvider for: {ai_config.provider.name if ai_config.provider else 'unknown'} "
-                    f"with package {package_name}"
-                )
-            return provider
-        except (ImportError, AttributeError, Exception) as error:
-            # If the provider is not available or creation fails, return None
-            if logger:
-                logger.warn(
-                    f"Error creating AIProvider for: {ai_config.provider.name if ai_config.provider else 'unknown'} "
-                    f"with package {package_name}: {error}"
-                )
-            return None
-
diff --git a/ldai/providers/langchain/__init__.py b/ldai/providers/langchain/__init__.py
deleted file mode 100644
index f2e2c35..0000000
--- a/ldai/providers/langchain/__init__.py
+++ /dev/null
@@ -1,284 +0,0 @@
-"""LangChain implementation of AIProvider for LaunchDarkly AI SDK."""
-
-from typing import Any, Dict, List, Optional
-
-from langchain_core.language_models.chat_models import BaseChatModel
-from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
-
-from ldai.models import AIConfigKind, LDMessage
-from ldai.providers.ai_provider import AIProvider
-from ldai.providers.types import ChatResponse, LDAIMetrics, StructuredResponse
-from ldai.tracker import TokenUsage
-
-
-class LangChainProvider(AIProvider):
-    """
-    LangChain implementation of AIProvider.
-    
-    This provider integrates LangChain models with LaunchDarkly's tracking capabilities.
-    """
-
-    def __init__(self, llm: BaseChatModel, logger: Optional[Any] = None):
-        """
-        Initialize the LangChain provider.
-        
-        :param llm: LangChain BaseChatModel instance
-        :param logger: Optional logger for logging provider operations
-        """
-        super().__init__(logger)
-        self._llm = llm
-
-    # =============================================================================
-    # MAIN FACTORY METHOD
-    # =============================================================================
-
-    @staticmethod
-    async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'LangChainProvider':
-        """
-        Static factory method to create a LangChain AIProvider from an AI configuration.
-        
-        :param ai_config: The LaunchDarkly AI configuration
-        :param logger: Optional logger for the provider
-        :return: Configured LangChainProvider instance
-        """
-        llm = await LangChainProvider.create_langchain_model(ai_config)
-        return LangChainProvider(llm, logger)
-
-    # =============================================================================
-    # INSTANCE METHODS (AIProvider Implementation)
-    # =============================================================================
-
-    async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
-        """
-        Invoke the LangChain model with an array of messages.
-        
-        :param messages: Array of LDMessage objects representing the conversation
-        :return: ChatResponse containing the model's response
-        """
-        try:
-            # Convert LDMessage[] to LangChain messages
-            langchain_messages = LangChainProvider.convert_messages_to_langchain(messages)
-
-            # Get the LangChain response
-            response: AIMessage = await self._llm.ainvoke(langchain_messages)
-
-            # Generate metrics early (assumes success by default)
-            metrics = LangChainProvider.get_ai_metrics_from_response(response)
-
-            # Extract text content from the response
-            content: str = ''
-            if isinstance(response.content, str):
-                content = response.content
-            else:
-                # Log warning for non-string content (likely multimodal)
-                if self.logger:
-                    self.logger.warn(
-                        f"Multimodal response not supported, expecting a string. "
-                        f"Content type: {type(response.content)}, Content: {response.content}"
-                    )
-                # Update metrics to reflect content loss
-                metrics.success = False
-
-            # Create the assistant message
-            from ldai.models import LDMessage
-            assistant_message = LDMessage(role='assistant', content=content)
-
-            return ChatResponse(
-                message=assistant_message,
-                metrics=metrics,
-            )
-        except Exception as error:
-            if self.logger:
-                self.logger.warn(f'LangChain model invocation failed: {error}')
-
-            from ldai.models import LDMessage
-            return ChatResponse(
-                message=LDMessage(role='assistant', content=''),
-                metrics=LDAIMetrics(success=False, usage=None),
-            )
-
-    async def invoke_structured_model(
-        self,
-        messages: List[LDMessage],
-        response_structure: Dict[str, Any],
-    ) -> StructuredResponse:
-        """
-        Invoke the LangChain model with structured output support.
-        
-        :param messages: Array of LDMessage objects representing the conversation
-        :param response_structure: Dictionary of output configurations keyed by output name
-        :return: StructuredResponse containing the structured data
-        """
-        try:
-            # Convert LDMessage[] to LangChain messages
-            langchain_messages = LangChainProvider.convert_messages_to_langchain(messages)
-
-            # Get the LangChain response with structured output
-            # Note: with_structured_output is available on BaseChatModel in newer LangChain versions
-            if hasattr(self._llm, 'with_structured_output'):
-                structured_llm = self._llm.with_structured_output(response_structure)
-                response = await structured_llm.ainvoke(langchain_messages)
-            else:
-                # Fallback: invoke normally and try to parse as JSON
-                response_obj = await self._llm.ainvoke(langchain_messages)
-                if isinstance(response_obj, AIMessage):
-                    import json
-                    try:
-                        response = json.loads(response_obj.content)
-                    except json.JSONDecodeError:
-                        response = {'content': response_obj.content}
-                else:
-                    response = response_obj
-
-            # Using structured output doesn't support metrics
-            metrics = LDAIMetrics(
-                success=True,
-                usage=TokenUsage(total=0, input=0, output=0),
-            )
-
-            import json
-            return StructuredResponse(
-                data=response if isinstance(response, dict) else {'result': response},
-                raw_response=json.dumps(response) if not isinstance(response, str) else response,
-                metrics=metrics,
-            )
-        except Exception as error:
-            if self.logger:
-                self.logger.warn(f'LangChain structured model invocation failed: {error}')
-
-            return StructuredResponse(
-                data={},
-                raw_response='',
-                metrics=LDAIMetrics(
-                    success=False,
-                    usage=TokenUsage(total=0, input=0, output=0),
-                ),
-            )
-
-    def get_chat_model(self) -> BaseChatModel:
-        """
-        Get the underlying LangChain model instance.
-        
-        :return: The LangChain BaseChatModel instance
-        """
-        return self._llm
-
-    # =============================================================================
-    # STATIC UTILITY METHODS
-    # =============================================================================
-
-    @staticmethod
-    def map_provider(ld_provider_name: str) -> str:
-        """
-        Map LaunchDarkly provider names to LangChain provider names.
-        
-        This method enables seamless integration between LaunchDarkly's standardized
-        provider naming and LangChain's naming conventions.
-        
-        :param ld_provider_name: LaunchDarkly provider name
-        :return: LangChain provider name
-        """
-        lowercased_name = ld_provider_name.lower()
-
-        mapping: Dict[str, str] = {
-            'gemini': 'google-genai',
-        }
-
-        return mapping.get(lowercased_name, lowercased_name)
-
-    @staticmethod
-    def get_ai_metrics_from_response(response: AIMessage) -> LDAIMetrics:
-        """
-        Get AI metrics from a LangChain provider response.
-        
-        This method extracts token usage information and success status from LangChain responses
-        and returns a LaunchDarkly LDAIMetrics object.
-        
-        :param response: The response from the LangChain model
-        :return: LDAIMetrics with success status and token usage
-        """
-        # Extract token usage if available
-        usage: Optional[TokenUsage] = None
-        if hasattr(response, 'response_metadata') and response.response_metadata:
-            token_usage = response.response_metadata.get('token_usage')
-            if token_usage:
-                usage = TokenUsage(
-                    total=token_usage.get('total_tokens', 0) or token_usage.get('totalTokens', 0) or 0,
-                    input=token_usage.get('prompt_tokens', 0) or token_usage.get('promptTokens', 0) or 0,
-                    output=token_usage.get('completion_tokens', 0) or token_usage.get('completionTokens', 0) or 0,
-                )
-
-        # LangChain responses that complete successfully are considered successful by default
-        return LDAIMetrics(success=True, usage=usage)
-
-    @staticmethod
-    def convert_messages_to_langchain(messages: List[LDMessage]) -> List[BaseMessage]:
-        """
-        Convert LaunchDarkly messages to LangChain messages.
-        
-        This helper method enables developers to work directly with LangChain message types
-        while maintaining compatibility with LaunchDarkly's standardized message format.
-        
-        :param messages: List of LDMessage objects
-        :return: List of LangChain message objects
-        """
-        result: List[BaseMessage] = []
-        for msg in messages:
-            if msg.role == 'system':
-                result.append(SystemMessage(content=msg.content))
-            elif msg.role == 'user':
-                result.append(HumanMessage(content=msg.content))
-            elif msg.role == 'assistant':
-                result.append(AIMessage(content=msg.content))
-            else:
-                raise ValueError(f'Unsupported message role: {msg.role}')
-        return result
-
-    @staticmethod
-    async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel:
-        """
-        Create a LangChain model from an AI configuration.
-        
-        This public helper method enables developers to initialize their own LangChain models
-        using LaunchDarkly AI configurations.
-        
-        :param ai_config: The LaunchDarkly AI configuration
-        :return: A configured LangChain BaseChatModel
-        """
-        model_name = ai_config.model.name if ai_config.model else ''
-        provider = ai_config.provider.name if ai_config.provider else ''
-        parameters = ai_config.model.get_parameter('parameters') if ai_config.model else {}
-        if not isinstance(parameters, dict):
-            parameters = {}
-
-        # Use LangChain's init_chat_model to support multiple providers
-        # Note: This requires langchain package to be installed
-        try:
-            # Try to import init_chat_model from langchain.chat_models
-            # This is available in langchain >= 0.1.0
-            try:
-                from langchain.chat_models import init_chat_model
-            except ImportError:
-                # Fallback for older versions or different import path
-                from langchain.chat_models.universal import init_chat_model
-            
-            # Map provider name
-            langchain_provider = LangChainProvider.map_provider(provider)
-            
-            # Create model configuration
-            model_kwargs = {**parameters}
-            if langchain_provider:
-                model_kwargs['model_provider'] = langchain_provider
-            
-            # Initialize the chat model (init_chat_model may be async or sync)
-            result = init_chat_model(model_name, **model_kwargs)
-            # Handle both sync and async initialization
-            if hasattr(result, '__await__'):
-                return await result
-            return result
-        except ImportError as e:
-            raise ImportError(
-                'langchain package is required for LangChainProvider. '
-                'Install it with: pip install langchain langchain-core'
-            ) from e
-
diff --git a/ldai/providers/types.py b/ldai/providers/types.py
deleted file mode 100644
index 45df755..0000000
--- a/ldai/providers/types.py
+++ /dev/null
@@ -1,92 +0,0 @@
-"""Types for AI provider responses."""
-
-from dataclasses import dataclass
-from typing import Any, Dict, List, Optional
-
-from ldai.models import LDMessage
-from ldai.tracker import TokenUsage
-
-
-@dataclass
-class LDAIMetrics:
-    """
-    Metrics information for AI operations that includes success status and token usage.
-    """
-    success: bool
-    usage: Optional[TokenUsage] = None
-
-    def to_dict(self) -> Dict[str, Any]:
-        """
-        Render the metrics as a dictionary object.
-        """
-        result: Dict[str, Any] = {
-            'success': self.success,
-        }
-        if self.usage is not None:
-            result['usage'] = {
-                'total': self.usage.total,
-                'input': self.usage.input,
-                'output': self.usage.output,
-            }
-        return result
-
-
-@dataclass
-class ChatResponse:
-    """
-    Chat response structure.
-    """
-    message: LDMessage
-    metrics: LDAIMetrics
-    evaluations: Optional[List[Any]] = None  # List of JudgeResponse, will be populated later
-
-
-@dataclass
-class StructuredResponse:
-    """
-    Structured response from AI models.
-    """
-    data: Dict[str, Any]
-    raw_response: str
-    metrics: LDAIMetrics
-
-
-@dataclass
-class EvalScore:
-    """
-    Score and reasoning for a single evaluation metric.
-    """
-    score: float  # Score between 0.0 and 1.0
-    reasoning: str  # Reasoning behind the provided score
-
-    def to_dict(self) -> Dict[str, Any]:
-        """
-        Render the evaluation score as a dictionary object.
-        """
-        return {
-            'score': self.score,
-            'reasoning': self.reasoning,
-        }
-
-
-@dataclass
-class JudgeResponse:
-    """
-    Response from a judge evaluation containing scores and reasoning for multiple metrics.
-    """
-    evals: Dict[str, EvalScore]  # Dictionary where keys are metric names and values contain score and reasoning
-    success: bool  # Whether the evaluation completed successfully
-    error: Optional[str] = None  # Error message if evaluation failed
-
-    def to_dict(self) -> Dict[str, Any]:
-        """
-        Render the judge response as a dictionary object.
-        """
-        result: Dict[str, Any] = {
-            'evals': {key: eval_score.to_dict() for key, eval_score in self.evals.items()},
-            'success': self.success,
-        }
-        if self.error is not None:
-            result['error'] = self.error
-        return result
-
diff --git a/ldai/testing/__init__.py b/ldai/testing/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/ldai/testing/test_agents.py b/ldai/testing/test_agents.py
deleted file mode 100644
index 755f2e5..0000000
--- a/ldai/testing/test_agents.py
+++ /dev/null
@@ -1,342 +0,0 @@
-import pytest
-from ldclient import Config, Context, LDClient
-from ldclient.integrations.test_data import TestData
-
-from ldai import (LDAIAgentConfig, LDAIAgentDefaults, LDAIClient, ModelConfig,
-                  ProviderConfig)
-
-
-@pytest.fixture
-def td() -> TestData:
-    td = TestData.data_source()
-
-    # Single agent with instructions
-    td.update(
-        td.flag('customer-support-agent')
-        .variations(
-            {
-                'model': {'name': 'gpt-4', 'parameters': {'temperature': 0.3, 'maxTokens': 2048}},
-                'provider': {'name': 'openai'},
-                'instructions': 'You are a helpful customer support agent for {{company_name}}. Always be polite and professional.',
-                '_ldMeta': {'enabled': True, 'variationKey': 'agent-v1', 'version': 1, 'mode': 'agent'},
-            }
-        )
-        .variation_for_all(0)
-    )
-
-    # Agent with context interpolation
-    td.update(
-        td.flag('personalized-agent')
-        .variations(
-            {
-                'model': {'name': 'claude-3', 'parameters': {'temperature': 0.5}},
-                'instructions': 'Hello {{ldctx.name}}! I am your personal assistant. Your user key is {{ldctx.key}}.',
-                '_ldMeta': {'enabled': True, 'variationKey': 'personal-v1', 'version': 2, 'mode': 'agent'},
-            }
-        )
-        .variation_for_all(0)
-    )
-
-    # Agent with multi-context interpolation
-    td.update(
-        td.flag('multi-context-agent')
-        .variations(
-            {
-                'model': {'name': 'gpt-3.5-turbo'},
-                'instructions': 'Welcome {{ldctx.user.name}} from {{ldctx.org.name}}! Your organization tier is {{ldctx.org.tier}}.',
-                '_ldMeta': {'enabled': True, 'variationKey': 'multi-v1', 'version': 1, 'mode': 'agent'},
-            }
-        )
-        .variation_for_all(0)
-    )
-
-    # Disabled agent
-    td.update(
-        td.flag('disabled-agent')
-        .variations(
-            {
-                'model': {'name': 'gpt-4'},
-                'instructions': 'This agent is disabled.',
-                '_ldMeta': {'enabled': False, 'variationKey': 'disabled-v1', 'version': 1, 'mode': 'agent'},
-            }
-        )
-        .variation_for_all(0)
-    )
-
-    # Agent with minimal metadata
-    td.update(
-        td.flag('minimal-agent')
-        .variations(
-            {
-                'instructions': 'Minimal agent configuration.',
-                '_ldMeta': {'enabled': True},
-            }
-        )
-        .variation_for_all(0)
-    )
-
-    # Sales assistant agent
-    td.update(
-        td.flag('sales-assistant')
-        .variations(
-            {
-                'model': {'name': 'gpt-4', 'parameters': {'temperature': 0.7}},
-                'provider': {'name': 'openai'},
-                'instructions': 'You are a sales assistant for {{company_name}}. Help customers find the right products.',
-                '_ldMeta': {'enabled': True, 'variationKey': 'sales-v1', 'version': 1, 'mode': 'agent'},
-            }
-        )
-        .variation_for_all(0)
-    )
-
-    # Research agent for testing single agent method
-    td.update(
-        td.flag('research-agent')
-        .variations(
-            {
-                'model': {'name': 'gpt-4', 'parameters': {'temperature': 0.2, 'maxTokens': 3000}},
-                'provider': {'name': 'openai'},
-                'instructions': 'You are a research assistant specializing in {{topic}}. Your expertise level should match {{ldctx.expertise}}.',
-                '_ldMeta': {'enabled': True, 'variationKey': 'research-v1', 'version': 1, 'mode': 'agent'},
-            }
-        )
-        .variation_for_all(0)
-    )
-
-    return td
-
-
-@pytest.fixture
-def client(td: TestData) -> LDClient:
-    config = Config('sdk-key', update_processor_class=td, send_events=False)
-    return LDClient(config=config)
-
-
-@pytest.fixture
-def ldai_client(client: LDClient) -> LDAIClient:
-    return LDAIClient(client)
-
-
-def test_single_agent_method(ldai_client: LDAIClient):
-    """Test the single agent() method functionality."""
-    context = Context.builder('user-key').set('expertise', 'advanced').build()
-    config = LDAIAgentConfig(
-        key='research-agent',
-        default_value=LDAIAgentDefaults(
-            enabled=False,
-            model=ModelConfig('fallback-model'),
-            instructions="Default instructions"
-        ),
-        variables={'topic': 'quantum computing'}
-    )
-
-    agent = ldai_client.agent(config, context)
-
-    assert agent.enabled is True
-    assert agent.model is not None
-    assert agent.model.name == 'gpt-4'
-    assert agent.model.get_parameter('temperature') == 0.2
-    assert agent.model.get_parameter('maxTokens') == 3000
-    assert agent.provider is not None
-    assert agent.provider.name == 'openai'
-    assert agent.instructions == 'You are a research assistant specializing in quantum computing. Your expertise level should match advanced.'
-    assert agent.tracker is not None
-
-
-def test_single_agent_with_defaults(ldai_client: LDAIClient):
-    """Test single agent method with non-existent flag using defaults."""
-    context = Context.create('user-key')
-    config = LDAIAgentConfig(
-        key='non-existent-agent',
-        default_value=LDAIAgentDefaults(
-            enabled=True,
-            model=ModelConfig('default-model', parameters={'temp': 0.8}),
-            provider=ProviderConfig('default-provider'),
-            instructions="You are a default assistant for {{task}}."
-        ),
-        variables={'task': 'general assistance'}
-    )
-
-    agent = ldai_client.agent(config, context)
-
-    assert agent.enabled is True
-    assert agent.model is not None and agent.model.name == 'default-model'
-    assert agent.model is not None and agent.model.get_parameter('temp') == 0.8
-    assert agent.provider is not None and agent.provider.name == 'default-provider'
-    assert agent.instructions == "You are a default assistant for general assistance."
-    assert agent.tracker is not None
-
-
-def test_agents_method_with_configs(ldai_client: LDAIClient):
-    """Test the new agents() method with LDAIAgentConfig objects."""
-    context = Context.create('user-key')
-
-    agent_configs = [
-        LDAIAgentConfig(
-            key='customer-support-agent',
-            default_value=LDAIAgentDefaults(
-                enabled=False,
-                model=ModelConfig('fallback-model'),
-                instructions="Default support"
-            ),
-            variables={'company_name': 'Acme Corp'}
-        ),
-        LDAIAgentConfig(
-            key='sales-assistant',
-            default_value=LDAIAgentDefaults(
-                enabled=False,
-                model=ModelConfig('fallback-model'),
-                instructions="Default sales"
-            ),
-            variables={'company_name': 'Acme Corp'}
-        )
-    ]
-
-    agents = ldai_client.agents(agent_configs, context)
-
-    assert len(agents) == 2
-    assert 'customer-support-agent' in agents
-    assert 'sales-assistant' in agents
-
-    support_agent = agents['customer-support-agent']
-    assert support_agent.enabled is True
-    assert support_agent.instructions is not None and 'Acme Corp' in support_agent.instructions
-
-    sales_agent = agents['sales-assistant']
-    assert sales_agent.enabled is True
-    assert sales_agent.instructions is not None and 'Acme Corp' in sales_agent.instructions
-    assert sales_agent.model is not None and sales_agent.model.get_parameter('temperature') == 0.7
-
-
-def test_agents_method_different_variables_per_agent(ldai_client: LDAIClient):
-    """Test agents method with different variables for each agent."""
-    context = Context.builder('user-key').name('Alice').build()
-
-    agent_configs = [
-        LDAIAgentConfig(
-            key='personalized-agent',
-            default_value=LDAIAgentDefaults(
-                enabled=True,
-                instructions="Default personal"
-            ),
-            variables={}  # Will use context only
-        ),
-        LDAIAgentConfig(
-            key='customer-support-agent',
-            default_value=LDAIAgentDefaults(
-                enabled=True,
-                instructions="Default support"
-            ),
-            variables={'company_name': 'TechStart Inc'}
-        )
-    ]
-
-    agents = ldai_client.agents(agent_configs, context)
-
-    personal_agent = agents['personalized-agent']
-    assert personal_agent.instructions == 'Hello Alice! I am your personal assistant. Your user key is user-key.'
-
-    support_agent = agents['customer-support-agent']
-    assert support_agent.instructions == 'You are a helpful customer support agent for TechStart Inc. Always be polite and professional.'
-
-
-def test_agents_with_multi_context_interpolation(ldai_client: LDAIClient):
-    """Test agents method with multi-context interpolation."""
-    user_context = Context.builder('user-key').name('Alice').build()
-    org_context = Context.builder('org-key').kind('org').name('LaunchDarkly').set('tier', 'Enterprise').build()
-    context = Context.multi_builder().add(user_context).add(org_context).build()
-
-    agent_configs = [
-        LDAIAgentConfig(
-            key='multi-context-agent',
-            default_value=LDAIAgentDefaults(
-                enabled=True,
-                instructions="Default multi-context"
-            ),
-            variables={}
-        )
-    ]
-
-    agents = ldai_client.agents(agent_configs, context)
-
-    agent = agents['multi-context-agent']
-    assert agent.instructions == 'Welcome Alice from LaunchDarkly! Your organization tier is Enterprise.'
-
-
-def test_disabled_agent_single_method(ldai_client: LDAIClient):
-    """Test that disabled agents are properly handled in single agent method."""
-    context = Context.create('user-key')
-    config = LDAIAgentConfig(
-        key='disabled-agent',
-        default_value=LDAIAgentDefaults(enabled=False),
-        variables={}
-    )
-
-    agent = ldai_client.agent(config, context)
-
-    assert agent.enabled is False
-    assert agent.tracker is not None
-
-
-def test_disabled_agent_multiple_method(ldai_client: LDAIClient):
-    """Test that disabled agents are properly handled in multiple agents method."""
-    context = Context.create('user-key')
-
-    agent_configs = [
-        LDAIAgentConfig(
-            key='disabled-agent',
-            default_value=LDAIAgentDefaults(enabled=False),
-            variables={}
-        )
-    ]
-
-    agents = ldai_client.agents(agent_configs, context)
-
-    assert len(agents) == 1
-    assert agents['disabled-agent'].enabled is False
-
-
-def test_agent_with_missing_metadata(ldai_client: LDAIClient):
-    """Test agent handling when metadata is minimal or missing."""
-    context = Context.create('user-key')
-    config = LDAIAgentConfig(
-        key='minimal-agent',
-        default_value=LDAIAgentDefaults(
-            enabled=False,
-            model=ModelConfig('default-model'),
-            instructions="Default instructions"
-        )
-    )
-
-    agent = ldai_client.agent(config, context)
-
-    assert agent.enabled is True  # From flag
-    assert agent.instructions == 'Minimal agent configuration.'
-    assert agent.model == config.default_value.model  # Falls back to default
-    assert agent.tracker is not None
-
-
-def test_agent_config_dataclass():
-    """Test the LDAIAgentConfig dataclass functionality."""
-    config = LDAIAgentConfig(
-        key='test-agent',
-        default_value=LDAIAgentDefaults(
-            enabled=True,
-            instructions="Test instructions"
-        ),
-        variables={'key': 'value'}
-    )
-
-    assert config.key == 'test-agent'
-    assert config.default_value.enabled is True
-    assert config.default_value.instructions == "Test instructions"
-    assert config.variables == {'key': 'value'}
-
-    # Test with no variables
-    config_no_vars = LDAIAgentConfig(
-        key='test-agent-2',
-        default_value=LDAIAgentDefaults(enabled=False)
-    )
-
-    assert config_no_vars.key == 'test-agent-2'
-    assert config_no_vars.variables is None
diff --git a/ldai/testing/test_langchain_provider.py b/ldai/testing/test_langchain_provider.py
deleted file mode 100644
index 3bb83a1..0000000
--- a/ldai/testing/test_langchain_provider.py
+++ /dev/null
@@ -1,237 +0,0 @@
-"""Tests for LangChain provider implementation."""
-
-import pytest
-from unittest.mock import AsyncMock, Mock
-
-from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
-
-from ldai.models import LDMessage
-from ldai.providers.langchain import LangChainProvider
-from ldai.tracker import TokenUsage
-
-
-class TestMessageConversion:
-    """Test conversion between LD messages and LangChain messages."""
-
-    def test_convert_multiple_messages(self):
-        """Test converting a conversation with all message types."""
-        ld_messages = [
-            LDMessage(role='system', content='You are helpful'),
-            LDMessage(role='user', content='Hello'),
-            LDMessage(role='assistant', content='Hi there!'),
-        ]
-        lc_messages = LangChainProvider.convert_messages_to_langchain(ld_messages)
-        
-        assert len(lc_messages) == 3
-        assert isinstance(lc_messages[0], SystemMessage)
-        assert isinstance(lc_messages[1], HumanMessage)
-        assert isinstance(lc_messages[2], AIMessage)
-        assert lc_messages[0].content == 'You are helpful'
-        assert lc_messages[1].content == 'Hello'
-        assert lc_messages[2].content == 'Hi there!'
-
-    def test_convert_unsupported_role_raises_error(self):
-        """Test that unsupported message roles raise ValueError."""
-        ld_messages = [LDMessage(role='function', content='Function result')]
-        
-        with pytest.raises(ValueError, match='Unsupported message role: function'):
-            LangChainProvider.convert_messages_to_langchain(ld_messages)
-
-
-class TestMetricsExtraction:
-    """Test metrics extraction from LangChain response metadata."""
-
-    def test_extract_metrics_with_token_usage(self):
-        """Test extracting token usage from response metadata."""
-        response = AIMessage(
-            content='Hello, world!',
-            response_metadata={
-                'token_usage': {
-                    'total_tokens': 100,
-                    'prompt_tokens': 60,
-                    'completion_tokens': 40,
-                }
-            }
-        )
-        
-        metrics = LangChainProvider.get_ai_metrics_from_response(response)
-        
-        assert metrics.success is True
-        assert metrics.usage is not None
-        assert metrics.usage.total == 100
-        assert metrics.usage.input == 60
-        assert metrics.usage.output == 40
-
-    def test_extract_metrics_with_camel_case_token_usage(self):
-        """Test extracting token usage with camelCase keys (some providers use this)."""
-        response = AIMessage(
-            content='Hello, world!',
-            response_metadata={
-                'token_usage': {
-                    'totalTokens': 150,
-                    'promptTokens': 90,
-                    'completionTokens': 60,
-                }
-            }
-        )
-        
-        metrics = LangChainProvider.get_ai_metrics_from_response(response)
-        
-        assert metrics.success is True
-        assert metrics.usage is not None
-        assert metrics.usage.total == 150
-        assert metrics.usage.input == 90
-        assert metrics.usage.output == 60
-
-    def test_extract_metrics_without_token_usage(self):
-        """Test metrics extraction when no token usage is available."""
-        response = AIMessage(content='Hello, world!')
-        
-        metrics = LangChainProvider.get_ai_metrics_from_response(response)
-        
-        assert metrics.success is True
-        assert metrics.usage is None
-
-
-class TestInvokeModel:
-    """Test model invocation with LangChain provider."""
-
-    @pytest.mark.asyncio
-    async def test_invoke_model_success(self):
-        """Test successful model invocation."""
-        mock_llm = AsyncMock()
-        mock_response = AIMessage(
-            content='Hello, user!',
-            response_metadata={
-                'token_usage': {
-                    'total_tokens': 20,
-                    'prompt_tokens': 10,
-                    'completion_tokens': 10,
-                }
-            }
-        )
-        mock_llm.ainvoke.return_value = mock_response
-        
-        provider = LangChainProvider(mock_llm)
-        messages = [LDMessage(role='user', content='Hello')]
-        
-        response = await provider.invoke_model(messages)
-        
-        assert response.message.role == 'assistant'
-        assert response.message.content == 'Hello, user!'
-        assert response.metrics.success is True
-        assert response.metrics.usage is not None
-        assert response.metrics.usage.total == 20
-
-    @pytest.mark.asyncio
-    async def test_invoke_model_with_multimodal_content_warning(self):
-        """Test that non-string content triggers warning and marks as failure."""
-        mock_llm = AsyncMock()
-        mock_response = AIMessage(
-            content=['text', {'type': 'image'}],  # Non-string content
-            response_metadata={'token_usage': {'total_tokens': 20}}
-        )
-        mock_llm.ainvoke.return_value = mock_response
-        
-        mock_logger = Mock()
-        provider = LangChainProvider(mock_llm, logger=mock_logger)
-        messages = [LDMessage(role='user', content='Describe this image')]
-        
-        response = await provider.invoke_model(messages)
-        
-        # Should warn about multimodal content
-        mock_logger.warn.assert_called_once()
-        assert 'Multimodal response not supported' in str(mock_logger.warn.call_args)
-        
-        # Should mark as failure
-        assert response.metrics.success is False
-        assert response.message.content == ''
-
-    @pytest.mark.asyncio
-    async def test_invoke_model_with_exception(self):
-        """Test model invocation handles exceptions gracefully."""
-        mock_llm = AsyncMock()
-        mock_llm.ainvoke.side_effect = Exception('Model API error')
-        
-        mock_logger = Mock()
-        provider = LangChainProvider(mock_llm, logger=mock_logger)
-        messages = [LDMessage(role='user', content='Hello')]
-        
-        response = await provider.invoke_model(messages)
-        
-        # Should log the error
-        mock_logger.warn.assert_called_once()
-        assert 'LangChain model invocation failed' in str(mock_logger.warn.call_args)
-        
-        # Should return failure response
-        assert response.message.role == 'assistant'
-        assert response.message.content == ''
-        assert response.metrics.success is False
-        assert response.metrics.usage is None
-
-
-class TestInvokeStructuredModel:
-    """Test structured output invocation."""
-
-    @pytest.mark.asyncio
-    async def test_invoke_structured_model_with_support(self):
-        """Test structured output when model supports with_structured_output."""
-        mock_llm = Mock()
-        mock_structured_llm = AsyncMock()
-        mock_structured_llm.ainvoke.return_value = {
-            'answer': 'Paris',
-            'confidence': 0.95
-        }
-        mock_llm.with_structured_output.return_value = mock_structured_llm
-        
-        provider = LangChainProvider(mock_llm)
-        messages = [LDMessage(role='user', content='What is the capital of France?')]
-        schema = {'answer': 'string', 'confidence': 'number'}
-        
-        response = await provider.invoke_structured_model(messages, schema)
-        
-        assert response.data == {'answer': 'Paris', 'confidence': 0.95}
-        assert response.metrics.success is True
-        mock_llm.with_structured_output.assert_called_once_with(schema)
-
-    @pytest.mark.asyncio
-    async def test_invoke_structured_model_without_support_json_fallback(self):
-        """Test structured output fallback to JSON parsing when not supported."""
-        mock_llm = AsyncMock()
-        # Model doesn't have with_structured_output
-        delattr(mock_llm, 'with_structured_output') if hasattr(mock_llm, 'with_structured_output') else None
-        
-        mock_response = AIMessage(content='{"answer": "Berlin", "confidence": 0.9}')
-        mock_llm.ainvoke.return_value = mock_response
-        
-        provider = LangChainProvider(mock_llm)
-        messages = [LDMessage(role='user', content='What is the capital of Germany?')]
-        schema = {'answer': 'string', 'confidence': 'number'}
-        
-        response = await provider.invoke_structured_model(messages, schema)
-        
-        assert response.data == {'answer': 'Berlin', 'confidence': 0.9}
-        assert response.metrics.success is True
-
-    @pytest.mark.asyncio
-    async def test_invoke_structured_model_with_exception(self):
-        """Test structured output handles exceptions gracefully."""
-        mock_llm = Mock()
-        mock_llm.with_structured_output.side_effect = Exception('Structured output error')
-        
-        mock_logger = Mock()
-        provider = LangChainProvider(mock_llm, logger=mock_logger)
-        messages = [LDMessage(role='user', content='Question')]
-        schema = {'answer': 'string'}
-        
-        response = await provider.invoke_structured_model(messages, schema)
-        
-        # Should log the error
-        mock_logger.warn.assert_called_once()
-        assert 'LangChain structured model invocation failed' in str(mock_logger.warn.call_args)
-        
-        # Should return failure response
-        assert response.data == {}
-        assert response.raw_response == ''
-        assert response.metrics.success is False
-
diff --git a/ldai/testing/test_model_config.py b/ldai/testing/test_model_config.py
deleted file mode 100644
index d556c10..0000000
--- a/ldai/testing/test_model_config.py
+++ /dev/null
@@ -1,330 +0,0 @@
-import pytest
-from ldclient import Config, Context, LDClient
-from ldclient.integrations.test_data import TestData
-
-from ldai import AICompletionConfigDefault, LDAIClient, LDMessage, ModelConfig
-
-
-@pytest.fixture
-def td() -> TestData:
-    td = TestData.data_source()
-    td.update(
-        td.flag('model-config')
-        .variations(
-            {
-                'model': {'name': 'fakeModel', 'parameters': {'temperature': 0.5, 'maxTokens': 4096}, 'custom': {'extra-attribute': 'value'}},
-                'provider': {'name': 'fakeProvider'},
-                'messages': [{'role': 'system', 'content': 'Hello, {{name}}!'}],
-                '_ldMeta': {'enabled': True, 'variationKey': 'abcd', 'version': 1},
-            },
-            "green",
-        )
-        .variation_for_all(0)
-    )
-
-    td.update(
-        td.flag('multiple-messages')
-        .variations(
-            {
-                'model': {'name': 'fakeModel', 'parameters': {'temperature': 0.7, 'maxTokens': 8192}},
-                'messages': [
-                    {'role': 'system', 'content': 'Hello, {{name}}!'},
-                    {'role': 'user', 'content': 'The day is, {{day}}!'},
-                ],
-                '_ldMeta': {'enabled': True, 'variationKey': 'abcd', 'version': 1},
-            },
-            "green",
-        )
-        .variation_for_all(0)
-    )
-
-    td.update(
-        td.flag('ctx-interpolation')
-        .variations(
-            {
-                'model': {'name': 'fakeModel', 'parameters': {'extra-attribute': 'I can be anything I set my mind/type to'}},
-                'messages': [{'role': 'system', 'content': 'Hello, {{ldctx.name}}! Is your last name {{ldctx.last}}?'}],
-                '_ldMeta': {'enabled': True, 'variationKey': 'abcd', 'version': 1},
-            }
-        )
-        .variation_for_all(0)
-    )
-
-    td.update(
-        td.flag('multi-ctx-interpolation')
-        .variations(
-            {
-                'model': {'name': 'fakeModel', 'parameters': {'extra-attribute': 'I can be anything I set my mind/type to'}},
-                'messages': [{'role': 'system', 'content': 'Hello, {{ldctx.user.name}}! Do you work for {{ldctx.org.shortname}}?'}],
-                '_ldMeta': {'enabled': True, 'variationKey': 'abcd', 'version': 1},
-            }
-        )
-        .variation_for_all(0)
-    )
-
-    td.update(
-        td.flag('off-config')
-        .variations(
-            {
-                'model': {'name': 'fakeModel', 'parameters': {'temperature': 0.1}},
-                'messages': [{'role': 'system', 'content': 'Hello, {{name}}!'}],
-                '_ldMeta': {'enabled': False, 'variationKey': 'abcd', 'version': 1},
-            }
-        )
-        .variation_for_all(0)
-    )
-
-    td.update(
-        td.flag('initial-config-disabled')
-        .variations(
-            {
-                '_ldMeta': {'enabled': False},
-            },
-            {
-                '_ldMeta': {'enabled': True},
-            }
-        )
-        .variation_for_all(0)
-    )
-
-    td.update(
-        td.flag('initial-config-enabled')
-        .variations(
-            {
-                '_ldMeta': {'enabled': False},
-            },
-            {
-                '_ldMeta': {'enabled': True},
-            }
-        )
-        .variation_for_all(1)
-    )
-
-    return td
-
-
-@pytest.fixture
-def client(td: TestData) -> LDClient:
-    config = Config('sdk-key', update_processor_class=td, send_events=False)
-    return LDClient(config=config)
-
-
-@pytest.fixture
-def ldai_client(client: LDClient) -> LDAIClient:
-    return LDAIClient(client)
-
-
-def test_model_config_delegates_to_properties():
-    model = ModelConfig('fakeModel', parameters={'extra-attribute': 'value'})
-    assert model.name == 'fakeModel'
-    assert model.get_parameter('extra-attribute') == 'value'
-    assert model.get_parameter('non-existent') is None
-
-    assert model.name == model.get_parameter('name')
-
-
-def test_model_config_handles_custom():
-    model = ModelConfig('fakeModel', custom={'extra-attribute': 'value'})
-    assert model.name == 'fakeModel'
-    assert model.get_parameter('extra-attribute') is None
-    assert model.get_custom('non-existent') is None
-    assert model.get_custom('name') is None
-
-
-def test_uses_default_on_invalid_flag(ldai_client: LDAIClient):
-    context = Context.create('user-key')
-    default_value = AICompletionConfigDefault(
-        enabled=True,
-        model=ModelConfig('fakeModel', parameters={'temperature': 0.5, 'maxTokens': 4096}),
-        messages=[LDMessage(role='system', content='Hello, {{name}}!')],
-    )
-    variables = {'name': 'World'}
-
-    config = ldai_client.config('missing-flag', context, default_value, variables)
-
-    assert config.messages is not None
-    assert len(config.messages) > 0
-    assert config.messages[0].content == 'Hello, World!'
-    assert config.enabled is True
-
-    assert config.model is not None
-    assert config.model.name == 'fakeModel'
-    assert config.model.get_parameter('temperature') == 0.5
-    assert config.model.get_parameter('maxTokens') == 4096
-
-
-def test_model_config_interpolation(ldai_client: LDAIClient):
-    context = Context.create('user-key')
-    default_value = AICompletionConfigDefault(
-        enabled=True,
-        model=ModelConfig('fakeModel'),
-        messages=[LDMessage(role='system', content='Hello, {{name}}!')],
-    )
-    variables = {'name': 'World'}
-
-    config = ldai_client.config('model-config', context, default_value, variables)
-
-    assert config.messages is not None
-    assert len(config.messages) > 0
-    assert config.messages[0].content == 'Hello, World!'
-    assert config.enabled is True
-
-    assert config.model is not None
-    assert config.model.name == 'fakeModel'
-    assert config.model.get_parameter('temperature') == 0.5
-    assert config.model.get_parameter('maxTokens') == 4096
-
-
-def test_model_config_no_variables(ldai_client: LDAIClient):
-    context = Context.create('user-key')
-    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
-
-    config = ldai_client.config('model-config', context, default_value, {})
-
-    assert config.messages is not None
-    assert len(config.messages) > 0
-    assert config.messages[0].content == 'Hello, !'
-    assert config.enabled is True
-
-    assert config.model is not None
-    assert config.model.name == 'fakeModel'
-    assert config.model.get_parameter('temperature') == 0.5
-    assert config.model.get_parameter('maxTokens') == 4096
-
-
-def test_provider_config_handling(ldai_client: LDAIClient):
-    context = Context.builder('user-key').name("Sandy").build()
-    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
-    variables = {'name': 'World'}
-
-    config = ldai_client.config('model-config', context, default_value, variables)
-
-    assert config.provider is not None
-    assert config.provider.name == 'fakeProvider'
-
-
-def test_context_interpolation(ldai_client: LDAIClient):
-    context = Context.builder('user-key').name("Sandy").set('last', 'Beaches').build()
-    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
-    variables = {'name': 'World'}
-
-    config = ldai_client.config(
-        'ctx-interpolation', context, default_value, variables
-    )
-
-    assert config.messages is not None
-    assert len(config.messages) > 0
-    assert config.messages[0].content == 'Hello, Sandy! Is your last name Beaches?'
-    assert config.enabled is True
-
-    assert config.model is not None
-    assert config.model.name == 'fakeModel'
-    assert config.model.get_parameter('temperature') is None
-    assert config.model.get_parameter('maxTokens') is None
-    assert config.model.get_parameter('extra-attribute') == 'I can be anything I set my mind/type to'
-
-
-def test_multi_context_interpolation(ldai_client: LDAIClient):
-    user_context = Context.builder('user-key').name("Sandy").build()
-    org_context = Context.builder('org-key').kind('org').name("LaunchDarkly").set('shortname', 'LD').build()
-    context = Context.multi_builder().add(user_context).add(org_context).build()
-    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
-    variables = {'name': 'World'}
-
-    config = ldai_client.config(
-        'multi-ctx-interpolation', context, default_value, variables
-    )
-
-    assert config.messages is not None
-    assert len(config.messages) > 0
-    assert config.messages[0].content == 'Hello, Sandy! Do you work for LD?'
-    assert config.enabled is True
-
-    assert config.model is not None
-    assert config.model.name == 'fakeModel'
-    assert config.model.get_parameter('temperature') is None
-    assert config.model.get_parameter('maxTokens') is None
-    assert config.model.get_parameter('extra-attribute') == 'I can be anything I set my mind/type to'
-
-
-def test_model_config_multiple(ldai_client: LDAIClient):
-    context = Context.create('user-key')
-    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
-    variables = {'name': 'World', 'day': 'Monday'}
-
-    config = ldai_client.config(
-        'multiple-messages', context, default_value, variables
-    )
-
-    assert config.messages is not None
-    assert len(config.messages) > 0
-    assert config.messages[0].content == 'Hello, World!'
-    assert config.messages[1].content == 'The day is, Monday!'
-    assert config.enabled is True
-
-    assert config.model is not None
-    assert config.model.name == 'fakeModel'
-    assert config.model.get_parameter('temperature') == 0.7
-    assert config.model.get_parameter('maxTokens') == 8192
-
-
-def test_model_config_disabled(ldai_client: LDAIClient):
-    context = Context.create('user-key')
-    default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[])
-
-    config = ldai_client.config('off-config', context, default_value, {})
-
-    assert config.model is not None
-    assert config.enabled is False
-    assert config.model.name == 'fakeModel'
-    assert config.model.get_parameter('temperature') == 0.1
-    assert config.model.get_parameter('maxTokens') is None
-
-
-def test_model_initial_config_disabled(ldai_client: LDAIClient):
-    context = Context.create('user-key')
-    default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[])
-
-    config = ldai_client.config('initial-config-disabled', context, default_value, {})
-
-    assert config.enabled is False
-    assert config.model is None
-    assert config.messages is None
-    assert config.provider is None
-
-
-def test_model_initial_config_enabled(ldai_client: LDAIClient):
-    context = Context.create('user-key')
-    default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[])
-
-    config = ldai_client.config('initial-config-enabled', context, default_value, {})
-
-    assert config.enabled is True
-    assert config.model is None
-    assert config.messages is None
-    assert config.provider is None
-
-
-def test_config_method_tracking(ldai_client: LDAIClient):
-    from unittest.mock import Mock
-
-    mock_client = Mock()
-    mock_client.variation.return_value = {
-        '_ldMeta': {'enabled': True, 'variationKey': 'test-variation', 'version': 1},
-        'model': {'name': 'test-model'},
-        'provider': {'name': 'test-provider'},
-        'messages': []
-    }
-
-    client = LDAIClient(mock_client)
-    context = Context.create('user-key')
-    default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[])
-
-    config = client.config('test-config-key', context, default_value)
-
-    mock_client.track.assert_called_once_with(
-        '$ld:ai:config:function:single',
-        context,
-        'test-config-key',
-        1
-    )
diff --git a/ldai/testing/test_tracker.py b/ldai/testing/test_tracker.py
deleted file mode 100644
index 2e39d98..0000000
--- a/ldai/testing/test_tracker.py
+++ /dev/null
@@ -1,444 +0,0 @@
-from time import sleep
-from unittest.mock import MagicMock, call
-
-import pytest
-from ldclient import Config, Context, LDClient
-from ldclient.integrations.test_data import TestData
-
-from ldai.tracker import FeedbackKind, LDAIConfigTracker, TokenUsage
-
-
-@pytest.fixture
-def td() -> TestData:
-    td = TestData.data_source()
-    td.update(
-        td.flag("model-config")
-        .variations(
-            {
-                "model": {
-                    "name": "fakeModel",
-                    "parameters": {"temperature": 0.5, "maxTokens": 4096},
-                    "custom": {"extra-attribute": "value"},
-                },
-                "provider": {"name": "fakeProvider"},
-                "messages": [{"role": "system", "content": "Hello, {{name}}!"}],
-                "_ldMeta": {"enabled": True, "variationKey": "abcd", "version": 1},
-            },
-            "green",
-        )
-        .variation_for_all(0)
-    )
-
-    return td
-
-
-@pytest.fixture
-def client(td: TestData) -> LDClient:
-    config = Config("sdk-key", update_processor_class=td, send_events=False)
-    client = LDClient(config=config)
-    client.track = MagicMock()  # type: ignore
-    return client
-
-
-def test_summary_starts_empty(client: LDClient):
-    context = Context.create("user-key")
-    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 1, "fakeModel", "fakeProvider", context)
-
-    assert tracker.get_summary().duration is None
-    assert tracker.get_summary().feedback is None
-    assert tracker.get_summary().success is None
-    assert tracker.get_summary().usage is None
-
-
-def test_tracks_duration(client: LDClient):
-    context = Context.create("user-key")
-    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
-    tracker.track_duration(100)
-
-    client.track.assert_called_with(  # type: ignore
-        "$ld:ai:duration:total",
-        context,
-        {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-        100,
-    )
-
-    assert tracker.get_summary().duration == 100
-
-
-def test_tracks_duration_of(client: LDClient):
-    context = Context.create("user-key")
-    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
-    tracker.track_duration_of(lambda: sleep(0.01))
-
-    calls = client.track.mock_calls  # type: ignore
-
-    assert len(calls) == 1
-    assert calls[0].args[0] == "$ld:ai:duration:total"
-    assert calls[0].args[1] == context
-    assert calls[0].args[2] == {
-        "variationKey": "variation-key",
-        "configKey": "config-key",
-        "version": 3,
-        "modelName": "fakeModel",
-        "providerName": "fakeProvider",
-    }
-    assert calls[0].args[3] == pytest.approx(10, rel=10)
-
-
-def test_tracks_time_to_first_token(client: LDClient):
-    context = Context.create("user-key")
-    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
-    tracker.track_time_to_first_token(100)
-
-    client.track.assert_called_with(  # type: ignore
-        "$ld:ai:tokens:ttf",
-        context,
-        {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-        100,
-    )
-
-    assert tracker.get_summary().time_to_first_token == 100
-
-
-def test_tracks_duration_of_with_exception(client: LDClient):
-    context = Context.create("user-key")
-    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
-
-    def sleep_and_throw():
-        sleep(0.01)
-        raise ValueError("Something went wrong")
-
-    try:
-        tracker.track_duration_of(sleep_and_throw)
-        assert False, "Should have thrown an exception"
-    except ValueError:
-        pass
-
-    calls = client.track.mock_calls  # type: ignore
-
-    assert len(calls) == 1
-    assert calls[0].args[0] == "$ld:ai:duration:total"
-    assert calls[0].args[1] == context
-    assert calls[0].args[2] == {
-        "variationKey": "variation-key",
-        "configKey": "config-key",
-        "version": 3,
-        "modelName": "fakeModel",
-        "providerName": "fakeProvider",
-    }
-    assert calls[0].args[3] == pytest.approx(10, rel=10)
-
-
-def test_tracks_token_usage(client: LDClient):
-    context = Context.create("user-key")
-    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
-
-    tokens = TokenUsage(300, 200, 100)
-    tracker.track_tokens(tokens)
-
-    calls = [
-        call(
-            "$ld:ai:tokens:total",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            300,
-        ),
-        call(
-            "$ld:ai:tokens:input",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            200,
-        ),
-        call(
-            "$ld:ai:tokens:output",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            100,
-        ),
-    ]
-
-    client.track.assert_has_calls(calls)  # type: ignore
-
-    assert tracker.get_summary().usage == tokens
-
-
-def test_tracks_bedrock_metrics(client: LDClient):
-    context = Context.create("user-key")
-    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
-
-    bedrock_result = {
-        "ResponseMetadata": {"HTTPStatusCode": 200},
-        "usage": {
-            "inputTokens": 220,
-            "outputTokens": 110,
-            "totalTokens": 330,
-        },
-        "metrics": {
-            "latencyMs": 50,
-        },
-    }
-    tracker.track_bedrock_converse_metrics(bedrock_result)
-
-    calls = [
-        call(
-            "$ld:ai:generation:success",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            1,
-        ),
-        call(
-            "$ld:ai:duration:total",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            50,
-        ),
-        call(
-            "$ld:ai:tokens:total",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            330,
-        ),
-        call(
-            "$ld:ai:tokens:input",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            220,
-        ),
-        call(
-            "$ld:ai:tokens:output",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            110,
-        ),
-    ]
-
-    client.track.assert_has_calls(calls)  # type: ignore
-
-    assert tracker.get_summary().success is True
-    assert tracker.get_summary().duration == 50
-    assert tracker.get_summary().usage == TokenUsage(330, 220, 110)
-
-
-def test_tracks_bedrock_metrics_with_error(client: LDClient):
-    context = Context.create("user-key")
-    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
-
-    bedrock_result = {
-        "ResponseMetadata": {"HTTPStatusCode": 500},
-        "usage": {
-            "totalTokens": 330,
-            "inputTokens": 220,
-            "outputTokens": 110,
-        },
-        "metrics": {
-            "latencyMs": 50,
-        },
-    }
-    tracker.track_bedrock_converse_metrics(bedrock_result)
-
-    calls = [
-        call(
-            "$ld:ai:generation:error",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            1,
-        ),
-        call(
-            "$ld:ai:duration:total",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            50,
-        ),
-        call(
-            "$ld:ai:tokens:total",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            330,
-        ),
-        call(
-            "$ld:ai:tokens:input",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            220,
-        ),
-        call(
-            "$ld:ai:tokens:output",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            110,
-        ),
-    ]
-
-    client.track.assert_has_calls(calls)  # type: ignore
-
-    assert tracker.get_summary().success is False
-    assert tracker.get_summary().duration == 50
-    assert tracker.get_summary().usage == TokenUsage(330, 220, 110)
-
-
-@pytest.mark.asyncio
-async def test_tracks_openai_metrics(client: LDClient):
-    context = Context.create("user-key")
-    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
-
-    class Result:
-        def __init__(self):
-            self.usage = Usage()
-
-    class Usage:
-        def to_dict(self):
-            return {
-                "total_tokens": 330,
-                "prompt_tokens": 220,
-                "completion_tokens": 110,
-            }
-
-    async def get_result():
-        return Result()
-
-    await tracker.track_openai_metrics(get_result)
-
-    calls = [
-        call(
-            "$ld:ai:generation:success",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            1,
-        ),
-        call(
-            "$ld:ai:tokens:total",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            330,
-        ),
-        call(
-            "$ld:ai:tokens:input",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            220,
-        ),
-        call(
-            "$ld:ai:tokens:output",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            110,
-        ),
-    ]
-
-    client.track.assert_has_calls(calls, any_order=False)  # type: ignore
-
-    assert tracker.get_summary().usage == TokenUsage(330, 220, 110)
-
-
-@pytest.mark.asyncio
-async def test_tracks_openai_metrics_with_exception(client: LDClient):
-    context = Context.create("user-key")
-    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
-
-    async def raise_exception():
-        raise ValueError("Something went wrong")
-
-    try:
-        await tracker.track_openai_metrics(raise_exception)
-        assert False, "Should have thrown an exception"
-    except ValueError:
-        pass
-
-    calls = [
-        call(
-            "$ld:ai:generation:error",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            1,
-        ),
-    ]
-
-    client.track.assert_has_calls(calls, any_order=False)  # type: ignore
-
-    assert tracker.get_summary().usage is None
-
-
-@pytest.mark.parametrize(
-    "kind,label",
-    [
-        pytest.param(FeedbackKind.Positive, "positive", id="positive"),
-        pytest.param(FeedbackKind.Negative, "negative", id="negative"),
-    ],
-)
-def test_tracks_feedback(client: LDClient, kind: FeedbackKind, label: str):
-    context = Context.create("user-key")
-    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
-
-    tracker.track_feedback({"kind": kind})
-
-    client.track.assert_called_with(  # type: ignore
-        f"$ld:ai:feedback:user:{label}",
-        context,
-        {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-        1,
-    )
-    assert tracker.get_summary().feedback == {"kind": kind}
-
-
-def test_tracks_success(client: LDClient):
-    context = Context.create("user-key")
-    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
-    tracker.track_success()
-
-    calls = [
-        call(
-            "$ld:ai:generation:success",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            1,
-        ),
-    ]
-
-    client.track.assert_has_calls(calls)  # type: ignore
-
-    assert tracker.get_summary().success is True
-
-
-def test_tracks_error(client: LDClient):
-    context = Context.create("user-key")
-    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
-    tracker.track_error()
-
-    calls = [
-        call(
-            "$ld:ai:generation:error",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            1,
-        ),
-    ]
-
-    client.track.assert_has_calls(calls)  # type: ignore
-
-    assert tracker.get_summary().success is False
-
-
-def test_error_overwrites_success(client: LDClient):
-    context = Context.create("user-key")
-    tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
-    tracker.track_success()
-    tracker.track_error()
-
-    calls = [
-        call(
-            "$ld:ai:generation:success",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            1,
-        ),
-        call(
-            "$ld:ai:generation:error",
-            context,
-            {"variationKey": "variation-key", "configKey": "config-key", "version": 3, "modelName": "fakeModel", "providerName": "fakeProvider"},
-            1,
-        ),
-    ]
-
-    client.track.assert_has_calls(calls)  # type: ignore
-
-    assert tracker.get_summary().success is False
diff --git a/ldai/tracker.py b/ldai/tracker.py
deleted file mode 100644
index 632f0f4..0000000
--- a/ldai/tracker.py
+++ /dev/null
@@ -1,404 +0,0 @@
-import time
-from dataclasses import dataclass
-from enum import Enum
-from typing import Any, Dict, Optional
-
-from ldclient import Context, LDClient
-
-
-class FeedbackKind(Enum):
-    """
-    Types of feedback that can be provided for AI operations.
-    """
-
-    Positive = "positive"
-    Negative = "negative"
-
-
-@dataclass
-class TokenUsage:
-    """
-    Tracks token usage for AI operations.
-
-    :param total: Total number of tokens used.
-    :param input: Number of tokens in the prompt.
-    :param output: Number of tokens in the completion.
-    """
-
-    total: int
-    input: int
-    output: int
-
-
-class LDAIMetricSummary:
-    """
-    Summary of metrics which have been tracked.
-    """
-
-    def __init__(self):
-        self._duration = None
-        self._success = None
-        self._feedback = None
-        self._usage = None
-        self._time_to_first_token = None
-
-    @property
-    def duration(self) -> Optional[int]:
-        return self._duration
-
-    @property
-    def success(self) -> Optional[bool]:
-        return self._success
-
-    @property
-    def feedback(self) -> Optional[Dict[str, FeedbackKind]]:
-        return self._feedback
-
-    @property
-    def usage(self) -> Optional[TokenUsage]:
-        return self._usage
-
-    @property
-    def time_to_first_token(self) -> Optional[int]:
-        return self._time_to_first_token
-
-
-class LDAIConfigTracker:
-    """
-    Tracks configuration and usage metrics for LaunchDarkly AI operations.
-    """
-
-    def __init__(
-        self,
-        ld_client: LDClient,
-        variation_key: str,
-        config_key: str,
-        version: int,
-        model_name: str,
-        provider_name: str,
-        context: Context,
-    ):
-        """
-        Initialize an AI Config tracker.
-
-        :param ld_client: LaunchDarkly client instance.
-        :param variation_key: Variation key for tracking.
-        :param config_key: Configuration key for tracking.
-        :param version: Version of the variation.
-        :param model_name: Name of the model used.
-        :param provider_name: Name of the provider used.
-        :param context: Context for evaluation.
-        """
-        self._ld_client = ld_client
-        self._variation_key = variation_key
-        self._config_key = config_key
-        self._version = version
-        self._model_name = model_name
-        self._provider_name = provider_name
-        self._context = context
-        self._summary = LDAIMetricSummary()
-
-    def __get_track_data(self):
-        """
-        Get tracking data for events.
-
-        :return: Dictionary containing variation and config keys.
-        """
-        return {
-            "variationKey": self._variation_key,
-            "configKey": self._config_key,
-            "version": self._version,
-            "modelName": self._model_name,
-            "providerName": self._provider_name,
-        }
-
-    def track_duration(self, duration: int) -> None:
-        """
-        Manually track the duration of an AI operation.
-
-        :param duration: Duration in milliseconds.
-        """
-        self._summary._duration = duration
-        self._ld_client.track(
-            "$ld:ai:duration:total", self._context, self.__get_track_data(), duration
-        )
-
-    def track_time_to_first_token(self, time_to_first_token: int) -> None:
-        """
-        Manually track the time to first token of an AI operation.
-
-        :param time_to_first_token: Time to first token in milliseconds.
-        """
-        self._summary._time_to_first_token = time_to_first_token
-        self._ld_client.track(
-            "$ld:ai:tokens:ttf",
-            self._context,
-            self.__get_track_data(),
-            time_to_first_token,
-        )
-
-    def track_duration_of(self, func):
-        """
-        Automatically track the duration of an AI operation.
-
-        An exception occurring during the execution of the function will still
-        track the duration. The exception will be re-thrown.
-
-        :param func: Function to track (synchronous only).
-        :return: Result of the tracked function.
-        """
-        start_time = time.time()
-        try:
-            result = func()
-        finally:
-            end_time = time.time()
-            duration = int((end_time - start_time) * 1000)  # duration in milliseconds
-            self.track_duration(duration)
-
-        return result
-
-    async def track_metrics_of(self, metrics_extractor, func):
-        """
-        Track metrics for a generic AI operation.
-
-        This function will track the duration of the operation, extract metrics using the provided
-        metrics extractor function, and track success or error status accordingly.
-
-        If the provided function throws, then this method will also throw.
-        In the case the provided function throws, this function will record the duration and an error.
-        A failed operation will not have any token usage data.
-
-        :param metrics_extractor: Function that extracts LDAIMetrics from the operation result
-        :param func: Async function which executes the operation
-        :return: The result of the operation
-        """
-        start_time = time.time()
-        result = None
-        try:
-            result = await func()
-        except Exception as err:
-            end_time = time.time()
-            duration = int((end_time - start_time) * 1000)
-            self.track_duration(duration)
-            self.track_error()
-            raise err
-
-        # Track duration after successful call
-        end_time = time.time()
-        duration = int((end_time - start_time) * 1000)
-        self.track_duration(duration)
-
-        # Extract metrics after successful AI call
-        from ldai.providers.types import LDAIMetrics
-        metrics = metrics_extractor(result)
-
-        # Track success/error based on metrics
-        if metrics.success:
-            self.track_success()
-        else:
-            self.track_error()
-
-        # Track token usage if available
-        if metrics.usage:
-            self.track_tokens(metrics.usage)
-
-        return result
-
-    def track_eval_scores(self, scores: Dict[str, Any]) -> None:
-        """
-        Track evaluation scores for multiple metrics.
-
-        :param scores: Dictionary mapping metric keys to their evaluation scores (EvalScore objects)
-        """
-        from ldai.providers.types import EvalScore
-        
-        # Track each evaluation score individually
-        for metric_key, eval_score in scores.items():
-            if isinstance(eval_score, EvalScore):
-                self._ld_client.track(
-                    metric_key,
-                    self._context,
-                    self.__get_track_data(),
-                    eval_score.score
-                )
-
-    def track_judge_response(self, judge_response: Any) -> None:
-        """
-        Track a judge response, including evaluation scores and success status.
-
-        :param judge_response: JudgeResponse object containing evals and success status
-        """
-        from ldai.providers.types import JudgeResponse
-        
-        if isinstance(judge_response, JudgeResponse):
-            # Track evaluation scores
-            if judge_response.evals:
-                self.track_eval_scores(judge_response.evals)
-            
-            # Track success/error based on judge response
-            if judge_response.success:
-                self.track_success()
-            else:
-                self.track_error()
-
-    def track_feedback(self, feedback: Dict[str, FeedbackKind]) -> None:
-        """
-        Track user feedback for an AI operation.
-
-        :param feedback: Dictionary containing feedback kind.
-        """
-        self._summary._feedback = feedback
-        if feedback["kind"] == FeedbackKind.Positive:
-            self._ld_client.track(
-                "$ld:ai:feedback:user:positive",
-                self._context,
-                self.__get_track_data(),
-                1,
-            )
-        elif feedback["kind"] == FeedbackKind.Negative:
-            self._ld_client.track(
-                "$ld:ai:feedback:user:negative",
-                self._context,
-                self.__get_track_data(),
-                1,
-            )
-
-    def track_success(self) -> None:
-        """
-        Track a successful AI generation.
-        """
-        self._summary._success = True
-        self._ld_client.track(
-            "$ld:ai:generation:success", self._context, self.__get_track_data(), 1
-        )
-
-    def track_error(self) -> None:
-        """
-        Track an unsuccessful AI generation attempt.
-        """
-        self._summary._success = False
-        self._ld_client.track(
-            "$ld:ai:generation:error", self._context, self.__get_track_data(), 1
-        )
-
-    async def track_openai_metrics(self, func):
-        """
-        Track OpenAI-specific operations.
-
-        This function will track the duration of the operation, the token
-        usage, and the success or error status.
-
-        If the provided function throws, then this method will also throw.
-
-        In the case the provided function throws, this function will record the
-        duration and an error.
-
-        A failed operation will not have any token usage data.
-
-        :param func: Async function to track.
-        :return: Result of the tracked function.
-        """
-        start_time = time.time()
-        try:
-            result = await func()
-            end_time = time.time()
-            duration = int((end_time - start_time) * 1000)
-            self.track_duration(duration)
-            self.track_success()
-            if hasattr(result, "usage") and hasattr(result.usage, "to_dict"):
-                self.track_tokens(_openai_to_token_usage(result.usage.to_dict()))
-        except Exception:
-            end_time = time.time()
-            duration = int((end_time - start_time) * 1000)
-            self.track_duration(duration)
-            self.track_error()
-            raise
-
-        return result
-
-    def track_bedrock_converse_metrics(self, res: dict) -> dict:
-        """
-        Track AWS Bedrock conversation operations.
-
-
-        This function will track the duration of the operation, the token
-        usage, and the success or error status.
-
-        :param res: Response dictionary from Bedrock.
-        :return: The original response dictionary.
-        """
-        status_code = res.get("ResponseMetadata", {}).get("HTTPStatusCode", 0)
-        if status_code == 200:
-            self.track_success()
-        elif status_code >= 400:
-            self.track_error()
-        if res.get("metrics", {}).get("latencyMs"):
-            self.track_duration(res["metrics"]["latencyMs"])
-        if res.get("usage"):
-            self.track_tokens(_bedrock_to_token_usage(res["usage"]))
-        return res
-
-    def track_tokens(self, tokens: TokenUsage) -> None:
-        """
-        Track token usage metrics.
-
-        :param tokens: Token usage data from either custom, OpenAI, or Bedrock sources.
-        """
-        self._summary._usage = tokens
-        if tokens.total > 0:
-            self._ld_client.track(
-                "$ld:ai:tokens:total",
-                self._context,
-                self.__get_track_data(),
-                tokens.total,
-            )
-        if tokens.input > 0:
-            self._ld_client.track(
-                "$ld:ai:tokens:input",
-                self._context,
-                self.__get_track_data(),
-                tokens.input,
-            )
-        if tokens.output > 0:
-            self._ld_client.track(
-                "$ld:ai:tokens:output",
-                self._context,
-                self.__get_track_data(),
-                tokens.output,
-            )
-
-    def get_summary(self) -> LDAIMetricSummary:
-        """
-        Get the current summary of AI metrics.
-
-        :return: Summary of AI metrics.
-        """
-        return self._summary
-
-
-def _bedrock_to_token_usage(data: dict) -> TokenUsage:
-    """
-    Convert a Bedrock usage dictionary to a TokenUsage object.
-
-    :param data: Dictionary containing Bedrock usage data.
-    :return: TokenUsage object containing usage data.
-    """
-    return TokenUsage(
-        total=data.get("totalTokens", 0),
-        input=data.get("inputTokens", 0),
-        output=data.get("outputTokens", 0),
-    )
-
-
-def _openai_to_token_usage(data: dict) -> TokenUsage:
-    """
-    Convert an OpenAI usage dictionary to a TokenUsage object.
-
-    :param data: Dictionary containing OpenAI usage data.
-    :return: TokenUsage object containing usage data.
-    """
-    return TokenUsage(
-        total=data.get("total_tokens", 0),
-        input=data.get("prompt_tokens", 0),
-        output=data.get("completion_tokens", 0),
-    )
diff --git a/pyproject.toml b/pyproject.toml
deleted file mode 100644
index 9c1f44a..0000000
--- a/pyproject.toml
+++ /dev/null
@@ -1,72 +0,0 @@
-[tool.poetry]
-name = "launchdarkly-server-sdk-ai"
-version = "0.10.1"
-description = "LaunchDarkly SDK for AI"
-authors = ["LaunchDarkly <dev@launchdarkly.com>"]
-license = "Apache-2.0"
-readme = "README.md"
-homepage = "https://docs.launchdarkly.com/sdk/ai/python"
-repository = "https://github.com/launchdarkly/python-server-sdk-ai"
-documentation = "https://launchdarkly-python-sdk-ai.readthedocs.io/en/latest/"
-classifiers = [
-    "Intended Audience :: Developers",
-    "License :: OSI Approved :: Apache Software License",
-    "Operating System :: OS Independent",
-    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.9",
-    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
-    "Programming Language :: Python :: 3.13",
-    "Topic :: Software Development",
-    "Topic :: Software Development :: Libraries",
-]
-packages = [ { include = "ldai" } ]
-exclude = [
-    { path = "ldai/testing", format = "wheel" }
-]
-
-[tool.poetry.dependencies]
-python = ">=3.9,<4"
-launchdarkly-server-sdk = ">=9.4.0"
-chevron = "=0.14.0"
-
-
-[tool.poetry.group.dev.dependencies]
-pytest = ">=2.8"
-pytest-cov = ">=2.4.0"
-pytest-mypy = "==1.0.1"
-pytest-asyncio = ">=0.21.0"
-mypy = "==1.18.2"
-pycodestyle = "^2.12.1"
-isort = ">=5.13.2,<7.0.0"
-
-
-[tool.poetry.group.docs]
-optional = true
-
-[tool.poetry.group.docs.dependencies]
-sphinx = ">=6,<8"
-sphinx-rtd-theme = ">=1.3,<4.0"
-certifi = ">=2018.4.16"
-expiringdict = ">=1.1.4"
-pyrfc3339 = ">=1.0"
-jsonpickle = ">1.4.1"
-semver = ">=2.7.9"
-urllib3 = ">=1.26.0"
-jinja2 = "3.1.6"
-
-[tool.mypy]
-python_version = "3.9"
-ignore_missing_imports = true
-install_types = true
-non_interactive = true
-
-
-[tool.pytest.ini_options]
-addopts = ["-ra"]
-
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"

From 8135577d04e7f5cfa06e7f00d4179b9426a67a89 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 17:49:25 +0100
Subject: [PATCH 21/37] fix ci

---
 .github/workflows/ci.yml | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fd8c2dc..3e7faec 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,7 +28,16 @@ jobs:
       - name: Install poetry
         uses: abatilo/actions-poetry@7b6d33e44b4f08d7021a1dee3c044e9c253d6439
 
-      - uses: ./.github/actions/build
+      - name: Build core package
+        uses: ./.github/actions/build
+        with:
+          package-path: packages/core
+
+      - name: Build langchain package
+        uses: ./.github/actions/build
+        with:
+          package-path: packages/langchain
+
       - uses: ./.github/actions/build-docs
 
       - name: Run tests
@@ -58,8 +67,8 @@ jobs:
       - name: Install poetry
         uses: abatilo/actions-poetry@7b6d33e44b4f08d7021a1dee3c044e9c253d6439
 
-      - name: Install requirements
-        run: poetry install
+      - name: Install packages
+        run: make install
 
       - name: Run tests
         run: make test

From 1e4606506f1c9a627813148126ec788e9c7690f2 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 18:51:26 +0100
Subject: [PATCH 22/37] pin to specified version.

---
 .github/workflows/manual-publish.yml | 7 +++++--
 .github/workflows/release-please.yml | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/manual-publish.yml b/.github/workflows/manual-publish.yml
index 237c97d..e59739e 100644
--- a/.github/workflows/manual-publish.yml
+++ b/.github/workflows/manual-publish.yml
@@ -45,7 +45,8 @@ jobs:
 
       - name: Publish core package to PyPI
         if: ${{ inputs.dry_run == false }}
-        uses: pypa/gh-action-pypi-publish@release/v1
+        # https://github.com/pypa/gh-action-pypi-publish/releases/tag/v1.8.13
+        uses: pypa/gh-action-pypi-publish@3cc2c35166dfc1e5ea3bb0491ffdeedcaa50d7c
         with:
           password: ${{ env.PYPI_AUTH_TOKEN }}
           packages-dir: packages/core/dist/
@@ -79,7 +80,9 @@ jobs:
 
       - name: Publish langchain package to PyPI
         if: ${{ inputs.dry_run == false }}
-        uses: pypa/gh-action-pypi-publish@release/v1
+        # Pinned to v1.8.13 (2024-06-14) for security
+        # https://github.com/pypa/gh-action-pypi-publish/releases/tag/v1.8.13
+        uses: pypa/gh-action-pypi-publish@3cc2c35166dfc1e5ea3bb0491ffdeedcaa50d7c
         with:
           password: ${{ env.PYPI_AUTH_TOKEN }}
           packages-dir: packages/langchain/dist/
diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml
index 5acd354..8b2e035 100644
--- a/.github/workflows/release-please.yml
+++ b/.github/workflows/release-please.yml
@@ -52,7 +52,8 @@ jobs:
       - uses: ./.github/actions/build-docs
 
       - name: Publish core package to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
+        # https://github.com/pypa/gh-action-pypi-publish/releases/tag/v1.8.13
+        uses: pypa/gh-action-pypi-publish@3cc2c35166dfc1e5ea3bb0491ffdeedcaa50d7c
         with:
           password: ${{ env.PYPI_AUTH_TOKEN }}
           packages-dir: packages/core/dist/
@@ -86,7 +87,9 @@ jobs:
           package-path: packages/langchain
 
       - name: Publish langchain package to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
+        # Pinned to v1.8.13 (2024-06-14) for security
+        # https://github.com/pypa/gh-action-pypi-publish/releases/tag/v1.8.13
+        uses: pypa/gh-action-pypi-publish@3cc2c35166dfc1e5ea3bb0491ffdeedcaa50d7c
         with:
           password: ${{ env.PYPI_AUTH_TOKEN }}
           packages-dir: packages/langchain/dist/

From 57d1d97b0056fbb872a841a548dfdb5c591aa741 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 19:01:22 +0100
Subject: [PATCH 23/37] fix docs

---
 Makefile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index eb55373..2c5a82c 100644
--- a/Makefile
+++ b/Makefile
@@ -76,5 +76,4 @@ build-langchain: #! Build langchain package
 .PHONY: docs
 docs: #! Generate sphinx-based documentation
 	@cd packages/core && poetry install --with docs
-	@cd docs
-	@cd packages/core && poetry run $(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+	@cd packages/core && poetry run $(SPHINXBUILD) -M html "../../$(SOURCEDIR)" "../../$(BUILDDIR)" $(SPHINXOPTS) $(O)

From 2b85ffe434365d300ebb2040bdc2d6d5a9a18078 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 19:07:23 +0100
Subject: [PATCH 24/37] fixes

---
 .github/workflows/ci.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3e7faec..fb820a9 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,6 +28,9 @@ jobs:
       - name: Install poetry
         uses: abatilo/actions-poetry@7b6d33e44b4f08d7021a1dee3c044e9c253d6439
 
+      - name: Install packages
+        run: make install
+
       - name: Build core package
         uses: ./.github/actions/build
         with:

From 22bb6647a3e09822d66b068dbdbfd2acb8143fbb Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 19:30:10 +0100
Subject: [PATCH 25/37] push 0.0.0 to claim

---
 packages/langchain/pyproject.toml        | 14 ++-------
 packages/langchain/pyproject.toml.backup | 40 ++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 12 deletions(-)
 create mode 100644 packages/langchain/pyproject.toml.backup

diff --git a/packages/langchain/pyproject.toml b/packages/langchain/pyproject.toml
index 33d2b3c..b471825 100644
--- a/packages/langchain/pyproject.toml
+++ b/packages/langchain/pyproject.toml
@@ -1,13 +1,12 @@
 [tool.poetry]
 name = "launchdarkly-server-sdk-ai-langchain"
-version = "0.1.0"
-description = "LangChain provider for LaunchDarkly AI SDK"
+version = "0.0.0"
+description = "Placeholder - LaunchDarkly LangChain provider package coming soon"
 authors = ["LaunchDarkly <dev@launchdarkly.com>"]
 license = "Apache-2.0"
 readme = "README.md"
 homepage = "https://docs.launchdarkly.com/sdk/ai/python"
 repository = "https://github.com/launchdarkly/python-server-sdk-ai"
-documentation = "https://launchdarkly-python-sdk-ai.readthedocs.io/en/latest/"
 classifiers = [
     "Intended Audience :: Developers",
     "License :: OSI Approved :: Apache Software License",
@@ -25,16 +24,7 @@ packages = [ { include = "ldai" } ]
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4"
-launchdarkly-server-sdk-ai = { path = "../core", develop = true }
-langchain = ">=0.3.0,<2.0"
-langchain-core = ">=0.3.0,<2.0"
-
-[tool.poetry.group.dev.dependencies]
-pytest = ">=2.8"
-pytest-cov = ">=2.4.0"
-pytest-asyncio = ">=0.21.0"
 
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
-
diff --git a/packages/langchain/pyproject.toml.backup b/packages/langchain/pyproject.toml.backup
new file mode 100644
index 0000000..33d2b3c
--- /dev/null
+++ b/packages/langchain/pyproject.toml.backup
@@ -0,0 +1,40 @@
+[tool.poetry]
+name = "launchdarkly-server-sdk-ai-langchain"
+version = "0.1.0"
+description = "LangChain provider for LaunchDarkly AI SDK"
+authors = ["LaunchDarkly <dev@launchdarkly.com>"]
+license = "Apache-2.0"
+readme = "README.md"
+homepage = "https://docs.launchdarkly.com/sdk/ai/python"
+repository = "https://github.com/launchdarkly/python-server-sdk-ai"
+documentation = "https://launchdarkly-python-sdk-ai.readthedocs.io/en/latest/"
+classifiers = [
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Software Development",
+    "Topic :: Software Development :: Libraries",
+]
+packages = [ { include = "ldai" } ]
+
+[tool.poetry.dependencies]
+python = ">=3.9,<4"
+launchdarkly-server-sdk-ai = { path = "../core", develop = true }
+langchain = ">=0.3.0,<2.0"
+langchain-core = ">=0.3.0,<2.0"
+
+[tool.poetry.group.dev.dependencies]
+pytest = ">=2.8"
+pytest-cov = ">=2.4.0"
+pytest-asyncio = ">=0.21.0"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+

From 869b4ebe2376c710b0e9d359d1feb49533778be1 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 20:45:48 +0100
Subject: [PATCH 26/37] bump package

---
 packages/langchain/pyproject.toml        | 14 +++++++--
 packages/langchain/pyproject.toml.backup | 40 ------------------------
 2 files changed, 12 insertions(+), 42 deletions(-)
 delete mode 100644 packages/langchain/pyproject.toml.backup

diff --git a/packages/langchain/pyproject.toml b/packages/langchain/pyproject.toml
index b471825..33d2b3c 100644
--- a/packages/langchain/pyproject.toml
+++ b/packages/langchain/pyproject.toml
@@ -1,12 +1,13 @@
 [tool.poetry]
 name = "launchdarkly-server-sdk-ai-langchain"
-version = "0.0.0"
-description = "Placeholder - LaunchDarkly LangChain provider package coming soon"
+version = "0.1.0"
+description = "LangChain provider for LaunchDarkly AI SDK"
 authors = ["LaunchDarkly <dev@launchdarkly.com>"]
 license = "Apache-2.0"
 readme = "README.md"
 homepage = "https://docs.launchdarkly.com/sdk/ai/python"
 repository = "https://github.com/launchdarkly/python-server-sdk-ai"
+documentation = "https://launchdarkly-python-sdk-ai.readthedocs.io/en/latest/"
 classifiers = [
     "Intended Audience :: Developers",
     "License :: OSI Approved :: Apache Software License",
@@ -24,7 +25,16 @@ packages = [ { include = "ldai" } ]
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4"
+launchdarkly-server-sdk-ai = { path = "../core", develop = true }
+langchain = ">=0.3.0,<2.0"
+langchain-core = ">=0.3.0,<2.0"
+
+[tool.poetry.group.dev.dependencies]
+pytest = ">=2.8"
+pytest-cov = ">=2.4.0"
+pytest-asyncio = ">=0.21.0"
 
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
+
diff --git a/packages/langchain/pyproject.toml.backup b/packages/langchain/pyproject.toml.backup
deleted file mode 100644
index 33d2b3c..0000000
--- a/packages/langchain/pyproject.toml.backup
+++ /dev/null
@@ -1,40 +0,0 @@
-[tool.poetry]
-name = "launchdarkly-server-sdk-ai-langchain"
-version = "0.1.0"
-description = "LangChain provider for LaunchDarkly AI SDK"
-authors = ["LaunchDarkly <dev@launchdarkly.com>"]
-license = "Apache-2.0"
-readme = "README.md"
-homepage = "https://docs.launchdarkly.com/sdk/ai/python"
-repository = "https://github.com/launchdarkly/python-server-sdk-ai"
-documentation = "https://launchdarkly-python-sdk-ai.readthedocs.io/en/latest/"
-classifiers = [
-    "Intended Audience :: Developers",
-    "License :: OSI Approved :: Apache Software License",
-    "Operating System :: OS Independent",
-    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.9",
-    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
-    "Programming Language :: Python :: 3.13",
-    "Topic :: Software Development",
-    "Topic :: Software Development :: Libraries",
-]
-packages = [ { include = "ldai" } ]
-
-[tool.poetry.dependencies]
-python = ">=3.9,<4"
-launchdarkly-server-sdk-ai = { path = "../core", develop = true }
-langchain = ">=0.3.0,<2.0"
-langchain-core = ">=0.3.0,<2.0"
-
-[tool.poetry.group.dev.dependencies]
-pytest = ">=2.8"
-pytest-cov = ">=2.4.0"
-pytest-asyncio = ">=0.21.0"
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
-

From 8bd73c9214a85daf087eedc938cb14bd926ed396 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 20:50:26 +0100
Subject: [PATCH 27/37] fix ci

---
 .github/workflows/ci.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fb820a9..a12d321 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -43,6 +43,9 @@ jobs:
 
       - uses: ./.github/actions/build-docs
 
+      - name: Reinstall packages after build
+        run: make install
+
       - name: Run tests
         run: make test
 

From caefa780f73cfbe9bfe377643ee7326820b4b0cd Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 21:27:15 +0100
Subject: [PATCH 28/37] move provenance to root level

---
 PROVENANCE.md               | 46 ++++++++++++++++++++++++++++---------
 packages/core/PROVENANCE.md | 45 ------------------------------------
 release-please-config.json  |  3 ++-
 3 files changed, 37 insertions(+), 57 deletions(-)
 delete mode 100644 packages/core/PROVENANCE.md

diff --git a/PROVENANCE.md b/PROVENANCE.md
index 73d317c..4a20571 100644
--- a/PROVENANCE.md
+++ b/PROVENANCE.md
@@ -4,38 +4,62 @@ LaunchDarkly uses the [SLSA framework](https://slsa.dev/spec/v1.0/about) (Supply
 
 As part of [SLSA requirements for level 3 compliance](https://slsa.dev/spec/v1.0/requirements), LaunchDarkly publishes provenance about our SDK package builds using [GitHub's generic SLSA3 provenance generator](https://github.com/slsa-framework/slsa-github-generator/blob/main/internal/builders/generic/README.md#generation-of-slsa3-provenance-for-arbitrary-projects) for distribution alongside our packages. These attestations are available for download from the GitHub release page for the release version under Assets > `multiple.intoto.jsonl`.
 
-To verify SLSA provenance attestations, we recommend using [slsa-verifier](https://github.com/slsa-framework/slsa-verifier). Example usage for verifying a package is included below:
+To verify SLSA provenance attestations, we recommend using [slsa-verifier](https://github.com/slsa-framework/slsa-verifier). Example usage for verifying packages is included below.
+
+### Verifying the Core Package
 
 <!-- x-release-please-start-version -->
 
-```
-# Set the version of the library to verify
-VERSION=0.10.1
+```bash
+# Set the version of the core package to verify
+CORE_VERSION=0.10.1
 ```
 
 <!-- x-release-please-end -->
 
+```bash
+# Download package from PyPI
+$ pip download --only-binary=:all: launchdarkly-server-sdk-ai==${CORE_VERSION}
+
+# Download provenance from GitHub release into same directory
+$ curl --location -O \
+  https://github.com/launchdarkly/python-server-sdk-ai/releases/download/core-${CORE_VERSION}/multiple.intoto.jsonl
+
+# Run slsa-verifier to verify provenance against package artifacts
+$ slsa-verifier verify-artifact \
+--provenance-path multiple.intoto.jsonl \
+--source-uri github.com/launchdarkly/python-server-sdk-ai \
+launchdarkly_server_sdk_ai-${CORE_VERSION}-py3-none-any.whl
 ```
-# Download package from PyPi
-$ pip download --only-binary=:all: launchdarkly-server-sdk-ai==${VERSION}
 
-# Download provenance from Github release into same directory
+### Verifying the LangChain Package
+
+```bash
+# Set the version of the langchain package to verify
+LANGCHAIN_VERSION=0.1.0
+
+# Download package from PyPI
+$ pip download --only-binary=:all: launchdarkly-server-sdk-ai-langchain==${LANGCHAIN_VERSION}
+
+# Download provenance from GitHub release into same directory
 $ curl --location -O \
-  https://github.com/launchdarkly/python-server-sdk-ai/releases/download/${VERSION}/multiple.intoto.jsonl
+  https://github.com/launchdarkly/python-server-sdk-ai/releases/download/langchain-${LANGCHAIN_VERSION}/multiple.intoto.jsonl
 
 # Run slsa-verifier to verify provenance against package artifacts
 $ slsa-verifier verify-artifact \
 --provenance-path multiple.intoto.jsonl \
 --source-uri github.com/launchdarkly/python-server-sdk-ai \
-launchdarkly_server_sdk_ai-${VERSION}-py3-none-any.whl
+launchdarkly_server_sdk_ai_langchain-${LANGCHAIN_VERSION}-py3-none-any.whl
 ```
 
-Below is a sample of expected output.
+### Expected Output
+
+Below is a sample of expected output for successful verification:
 
 ```
 Verified signature against tlog entry index 150910243 at URL: https://rekor.sigstore.dev/api/v1/log/entries/108e9186e8c5677ab3f14fc82cd3deb769e07ef812cadda623c08c77d4e51fc03124ee7542c470a1
 Verified build using builder "https://github.com/slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@refs/tags/v2.0.0" at commit 8e2d4094b4833d075e70dfce43bbc7176008c4a1
-Verifying artifact launchdarkly_server_sdk_ai-0.3.0-py3-none-any.whl: PASSED
+Verifying artifact launchdarkly_server_sdk_ai-0.10.1-py3-none-any.whl: PASSED
 
 PASSED: SLSA verification passed
 ```
diff --git a/packages/core/PROVENANCE.md b/packages/core/PROVENANCE.md
deleted file mode 100644
index 73d317c..0000000
--- a/packages/core/PROVENANCE.md
+++ /dev/null
@@ -1,45 +0,0 @@
-## Verifying SDK build provenance with the SLSA framework
-
-LaunchDarkly uses the [SLSA framework](https://slsa.dev/spec/v1.0/about) (Supply-chain Levels for Software Artifacts) to help developers make their supply chain more secure by ensuring the authenticity and build integrity of our published SDK packages.
-
-As part of [SLSA requirements for level 3 compliance](https://slsa.dev/spec/v1.0/requirements), LaunchDarkly publishes provenance about our SDK package builds using [GitHub's generic SLSA3 provenance generator](https://github.com/slsa-framework/slsa-github-generator/blob/main/internal/builders/generic/README.md#generation-of-slsa3-provenance-for-arbitrary-projects) for distribution alongside our packages. These attestations are available for download from the GitHub release page for the release version under Assets > `multiple.intoto.jsonl`.
-
-To verify SLSA provenance attestations, we recommend using [slsa-verifier](https://github.com/slsa-framework/slsa-verifier). Example usage for verifying a package is included below:
-
-<!-- x-release-please-start-version -->
-
-```
-# Set the version of the library to verify
-VERSION=0.10.1
-```
-
-<!-- x-release-please-end -->
-
-```
-# Download package from PyPi
-$ pip download --only-binary=:all: launchdarkly-server-sdk-ai==${VERSION}
-
-# Download provenance from Github release into same directory
-$ curl --location -O \
-  https://github.com/launchdarkly/python-server-sdk-ai/releases/download/${VERSION}/multiple.intoto.jsonl
-
-# Run slsa-verifier to verify provenance against package artifacts
-$ slsa-verifier verify-artifact \
---provenance-path multiple.intoto.jsonl \
---source-uri github.com/launchdarkly/python-server-sdk-ai \
-launchdarkly_server_sdk_ai-${VERSION}-py3-none-any.whl
-```
-
-Below is a sample of expected output.
-
-```
-Verified signature against tlog entry index 150910243 at URL: https://rekor.sigstore.dev/api/v1/log/entries/108e9186e8c5677ab3f14fc82cd3deb769e07ef812cadda623c08c77d4e51fc03124ee7542c470a1
-Verified build using builder "https://github.com/slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@refs/tags/v2.0.0" at commit 8e2d4094b4833d075e70dfce43bbc7176008c4a1
-Verifying artifact launchdarkly_server_sdk_ai-0.3.0-py3-none-any.whl: PASSED
-
-PASSED: SLSA verification passed
-```
-
-Alternatively, to verify the provenance manually, the SLSA framework specifies [recommendations for verifying build artifacts](https://slsa.dev/spec/v1.0/verifying-artifacts) in their documentation.
-
-**Note:** These instructions do not apply when building our libraries from source.
diff --git a/release-please-config.json b/release-please-config.json
index 1de9de1..583bb8f 100644
--- a/release-please-config.json
+++ b/release-please-config.json
@@ -7,7 +7,7 @@
       "versioning": "default",
       "bump-minor-pre-major": true,
       "include-v-in-tag": false,
-      "extra-files": ["packages/core/ldai/__init__.py", "packages/core/PROVENANCE.md"],
+      "extra-files": ["packages/core/ldai/__init__.py", "PROVENANCE.md"],
       "include-component-in-tag": true,
       "component": "core"
     },
@@ -17,6 +17,7 @@
       "versioning": "default",
       "bump-minor-pre-major": true,
       "include-v-in-tag": false,
+      "extra-files": ["PROVENANCE.md"],
       "include-component-in-tag": true,
       "component": "langchain"
     }

From 7792cacea612a0b5b3bf320dd738300d7ce07246 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 22:04:58 +0100
Subject: [PATCH 29/37] fixes

---
 README.md | 160 ++++++++++++++----------------------------------------
 1 file changed, 41 insertions(+), 119 deletions(-)

diff --git a/README.md b/README.md
index ea5b04e..8a983f3 100644
--- a/README.md
+++ b/README.md
@@ -1,130 +1,52 @@
-# LaunchDarkly Server-side AI SDK for Python - Monorepo
+# LaunchDarkly Server-side AI library for Python
 
-This repository contains the LaunchDarkly AI SDK for Python and its provider packages.
+> **Note:** This repository is a monorepo containing multiple packages. See the [Packages](#packages) section below.
+
+## LaunchDarkly overview
+
+[LaunchDarkly](https://www.launchdarkly.com) is a feature management platform that serves trillions of feature flags daily to help teams build better software, faster. [Get started](https://docs.launchdarkly.com/home/getting-started) using LaunchDarkly today!
+
+[![Twitter Follow](https://img.shields.io/twitter/follow/launchdarkly.svg?style=social&label=Follow&maxAge=2592000)](https://twitter.com/intent/follow?screen_name=launchdarkly)
+
+## Supported Python versions
+
+This version of the library has a minimum Python version of 3.9.
 
 ## Packages
 
-### Core SDK
-**Package:** [`launchdarkly-server-sdk-ai`](./packages/core/)  
-**PyPI:** https://pypi.org/project/launchdarkly-server-sdk-ai/
-
-The core LaunchDarkly AI SDK providing:
-- AI configuration management
-- Tracking and metrics
-- Provider abstraction layer
-- Chat management
-
-```bash
-pip install launchdarkly-server-sdk-ai
-```
-
-### LangChain Provider
-**Package:** [`launchdarkly-server-sdk-ai-langchain`](./packages/langchain/)  
-**PyPI:** https://pypi.org/project/launchdarkly-server-sdk-ai-langchain/
-
-LangChain provider supporting multiple AI providers through LangChain's unified interface.
-
-```bash
-pip install launchdarkly-server-sdk-ai-langchain
-```
-
-## Installation
-
-### Basic Installation
-```bash
-# Install core SDK
-pip install launchdarkly-server-sdk-ai
-
-# Install with LangChain provider
-pip install launchdarkly-server-sdk-ai-langchain
-```
-
-### Development Installation
-```bash
-# Clone the repository
-git clone https://github.com/launchdarkly/python-server-sdk-ai.git
-cd python-server-sdk-ai
-
-# Install core package
-cd packages/core
-poetry install
-
-# Install langchain package (in separate terminal/session)
-cd packages/langchain
-poetry install
-```
-
-## Usage
-
-```python
-from ldclient import init, Context
-from ldai import init_ai
-
-# Initialize
-ld_client = init('your-sdk-key')
-ai_client = init_ai(ld_client)
-
-# Create a chat (automatically uses installed providers)
-context = Context.create('user-key')
-chat = await ai_client.create_chat('chat-config', context)
-
-if chat:
-    response = await chat.invoke('Hello!')
-    print(response.message.content)
-```
-
-## Documentation
-
-- [SDK Reference Guide](https://docs.launchdarkly.com/sdk/ai/python)
-- [API Documentation](https://launchdarkly-python-sdk-ai.readthedocs.io/)
-- [Core Package README](./packages/core/README.md)
-- [LangChain Provider README](./packages/langchain/README.md)
-
-## Repository Structure
-
-```
-python-server-sdk-ai/
-├── packages/
-│   ├── core/                    # Core SDK
-│   │   ├── ldai/               # Main SDK code
-│   │   ├── pyproject.toml
-│   │   └── README.md
-│   └── langchain/               # LangChain provider
-│       ├── ldai/
-│       │   └── providers/
-│       │       └── langchain/
-│       ├── pyproject.toml
-│       └── README.md
-├── .github/
-│   └── workflows/               # CI/CD workflows
-├── release-please-config.json   # Multi-package release config
-└── .release-please-manifest.json # Version tracking
-```
-
-## Publishing
-
-Each package is published independently to PyPI:
-- Core: `launchdarkly-server-sdk-ai`
-- LangChain: `launchdarkly-server-sdk-ai-langchain`
-
-Releases are managed automatically via Release Please when changes are merged to `main`.
+This repository contains the following packages:
 
-## Contributing
+- **[`launchdarkly-server-sdk-ai`](./packages/core/)** - Core LaunchDarkly AI SDK
+- **[`launchdarkly-server-sdk-ai-langchain`](./packages/langchain/)** - LangChain provider integration
 
-See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
+Refer to each package's README for specific installation and usage instructions.
+
+## Getting started
+
+Refer to the [SDK reference guide](https://docs.launchdarkly.com/sdk/ai/python) for instructions on getting started with using the SDK.
+
+## Learn more
+
+Read our [documentation](http://docs.launchdarkly.com) for in-depth instructions on configuring and using LaunchDarkly. You can also head straight to the [reference guide for the python SDK](http://docs.launchdarkly.com/docs/python-sdk-ai-reference).
+
+## Contributing
 
-## Testing
+We encourage pull requests and other contributions from the community. Check out our [contributing guidelines](CONTRIBUTING.md) for instructions on how to contribute to this library.
 
-```bash
-# Test core package
-cd packages/core
-poetry run pytest
+## Verifying library build provenance with the SLSA framework
 
-# Test langchain package
-cd packages/langchain
-poetry run pytest
-```
+LaunchDarkly uses the [SLSA framework](https://slsa.dev/spec/v1.0/about) (Supply-chain Levels for Software Artifacts) to help developers make their supply chain more secure by ensuring the authenticity and build integrity of our published library packages. To learn more, see the [provenance guide](PROVENANCE.md).
 
-## License
+## About LaunchDarkly
 
-Apache-2.0. See [LICENSE.txt](LICENSE.txt)
+- LaunchDarkly is a continuous delivery platform that provides feature flags as a service and allows developers to iterate quickly and safely. We allow you to easily flag your features and manage them from the LaunchDarkly dashboard. With LaunchDarkly, you can:
+  - Roll out a new feature to a subset of your users (like a group of users who opt-in to a beta tester group), gathering feedback and bug reports from real-world use cases.
+  - Gradually roll out a feature to an increasing percentage of users, and track the effect that the feature has on key metrics (for instance, how likely is a user to complete a purchase if they have feature A versus feature B?).
+  - Turn off a feature that you realize is causing performance problems in production, without needing to re-deploy, or even restart the application with a changed configuration file.
+  - Grant access to certain features based on user attributes, like payment plan (eg: users on the 'gold' plan get access to more features than users in the 'silver' plan). Disable parts of your application to facilitate maintenance, without taking everything offline.
+- LaunchDarkly provides feature flag SDKs for a wide variety of languages and technologies. Read [our documentation](https://docs.launchdarkly.com/sdk) for a complete list.
+- Explore LaunchDarkly
+  - [launchdarkly.com](https://www.launchdarkly.com/ "LaunchDarkly Main Website") for more information
+  - [docs.launchdarkly.com](https://docs.launchdarkly.com/ "LaunchDarkly Documentation") for our documentation and SDK reference guides
+  - [apidocs.launchdarkly.com](https://apidocs.launchdarkly.com/ "LaunchDarkly API Documentation") for our API documentation
+  - [blog.launchdarkly.com](https://blog.launchdarkly.com/ "LaunchDarkly Blog Documentation") for the latest product updates

From e5728a93061dac9a65fb9d4403f2f72abfa23b7f Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 22:19:52 +0100
Subject: [PATCH 30/37] seperate langchain provider to its own file instead of
 init

---
 .../ldai/providers/langchain/__init__.py      | 284 +-----------------
 .../providers/langchain/langchain_provider.py | 272 +++++++++++++++++
 2 files changed, 275 insertions(+), 281 deletions(-)
 create mode 100644 packages/langchain/ldai/providers/langchain/langchain_provider.py

diff --git a/packages/langchain/ldai/providers/langchain/__init__.py b/packages/langchain/ldai/providers/langchain/__init__.py
index f2e2c35..2e1a27a 100644
--- a/packages/langchain/ldai/providers/langchain/__init__.py
+++ b/packages/langchain/ldai/providers/langchain/__init__.py
@@ -1,284 +1,6 @@
-"""LangChain implementation of AIProvider for LaunchDarkly AI SDK."""
+"""LangChain provider module for LaunchDarkly AI SDK."""
 
-from typing import Any, Dict, List, Optional
+from ldai.providers.langchain.langchain_provider import LangChainProvider
 
-from langchain_core.language_models.chat_models import BaseChatModel
-from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
-
-from ldai.models import AIConfigKind, LDMessage
-from ldai.providers.ai_provider import AIProvider
-from ldai.providers.types import ChatResponse, LDAIMetrics, StructuredResponse
-from ldai.tracker import TokenUsage
-
-
-class LangChainProvider(AIProvider):
-    """
-    LangChain implementation of AIProvider.
-    
-    This provider integrates LangChain models with LaunchDarkly's tracking capabilities.
-    """
-
-    def __init__(self, llm: BaseChatModel, logger: Optional[Any] = None):
-        """
-        Initialize the LangChain provider.
-        
-        :param llm: LangChain BaseChatModel instance
-        :param logger: Optional logger for logging provider operations
-        """
-        super().__init__(logger)
-        self._llm = llm
-
-    # =============================================================================
-    # MAIN FACTORY METHOD
-    # =============================================================================
-
-    @staticmethod
-    async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'LangChainProvider':
-        """
-        Static factory method to create a LangChain AIProvider from an AI configuration.
-        
-        :param ai_config: The LaunchDarkly AI configuration
-        :param logger: Optional logger for the provider
-        :return: Configured LangChainProvider instance
-        """
-        llm = await LangChainProvider.create_langchain_model(ai_config)
-        return LangChainProvider(llm, logger)
-
-    # =============================================================================
-    # INSTANCE METHODS (AIProvider Implementation)
-    # =============================================================================
-
-    async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
-        """
-        Invoke the LangChain model with an array of messages.
-        
-        :param messages: Array of LDMessage objects representing the conversation
-        :return: ChatResponse containing the model's response
-        """
-        try:
-            # Convert LDMessage[] to LangChain messages
-            langchain_messages = LangChainProvider.convert_messages_to_langchain(messages)
-
-            # Get the LangChain response
-            response: AIMessage = await self._llm.ainvoke(langchain_messages)
-
-            # Generate metrics early (assumes success by default)
-            metrics = LangChainProvider.get_ai_metrics_from_response(response)
-
-            # Extract text content from the response
-            content: str = ''
-            if isinstance(response.content, str):
-                content = response.content
-            else:
-                # Log warning for non-string content (likely multimodal)
-                if self.logger:
-                    self.logger.warn(
-                        f"Multimodal response not supported, expecting a string. "
-                        f"Content type: {type(response.content)}, Content: {response.content}"
-                    )
-                # Update metrics to reflect content loss
-                metrics.success = False
-
-            # Create the assistant message
-            from ldai.models import LDMessage
-            assistant_message = LDMessage(role='assistant', content=content)
-
-            return ChatResponse(
-                message=assistant_message,
-                metrics=metrics,
-            )
-        except Exception as error:
-            if self.logger:
-                self.logger.warn(f'LangChain model invocation failed: {error}')
-
-            from ldai.models import LDMessage
-            return ChatResponse(
-                message=LDMessage(role='assistant', content=''),
-                metrics=LDAIMetrics(success=False, usage=None),
-            )
-
-    async def invoke_structured_model(
-        self,
-        messages: List[LDMessage],
-        response_structure: Dict[str, Any],
-    ) -> StructuredResponse:
-        """
-        Invoke the LangChain model with structured output support.
-        
-        :param messages: Array of LDMessage objects representing the conversation
-        :param response_structure: Dictionary of output configurations keyed by output name
-        :return: StructuredResponse containing the structured data
-        """
-        try:
-            # Convert LDMessage[] to LangChain messages
-            langchain_messages = LangChainProvider.convert_messages_to_langchain(messages)
-
-            # Get the LangChain response with structured output
-            # Note: with_structured_output is available on BaseChatModel in newer LangChain versions
-            if hasattr(self._llm, 'with_structured_output'):
-                structured_llm = self._llm.with_structured_output(response_structure)
-                response = await structured_llm.ainvoke(langchain_messages)
-            else:
-                # Fallback: invoke normally and try to parse as JSON
-                response_obj = await self._llm.ainvoke(langchain_messages)
-                if isinstance(response_obj, AIMessage):
-                    import json
-                    try:
-                        response = json.loads(response_obj.content)
-                    except json.JSONDecodeError:
-                        response = {'content': response_obj.content}
-                else:
-                    response = response_obj
-
-            # Using structured output doesn't support metrics
-            metrics = LDAIMetrics(
-                success=True,
-                usage=TokenUsage(total=0, input=0, output=0),
-            )
-
-            import json
-            return StructuredResponse(
-                data=response if isinstance(response, dict) else {'result': response},
-                raw_response=json.dumps(response) if not isinstance(response, str) else response,
-                metrics=metrics,
-            )
-        except Exception as error:
-            if self.logger:
-                self.logger.warn(f'LangChain structured model invocation failed: {error}')
-
-            return StructuredResponse(
-                data={},
-                raw_response='',
-                metrics=LDAIMetrics(
-                    success=False,
-                    usage=TokenUsage(total=0, input=0, output=0),
-                ),
-            )
-
-    def get_chat_model(self) -> BaseChatModel:
-        """
-        Get the underlying LangChain model instance.
-        
-        :return: The LangChain BaseChatModel instance
-        """
-        return self._llm
-
-    # =============================================================================
-    # STATIC UTILITY METHODS
-    # =============================================================================
-
-    @staticmethod
-    def map_provider(ld_provider_name: str) -> str:
-        """
-        Map LaunchDarkly provider names to LangChain provider names.
-        
-        This method enables seamless integration between LaunchDarkly's standardized
-        provider naming and LangChain's naming conventions.
-        
-        :param ld_provider_name: LaunchDarkly provider name
-        :return: LangChain provider name
-        """
-        lowercased_name = ld_provider_name.lower()
-
-        mapping: Dict[str, str] = {
-            'gemini': 'google-genai',
-        }
-
-        return mapping.get(lowercased_name, lowercased_name)
-
-    @staticmethod
-    def get_ai_metrics_from_response(response: AIMessage) -> LDAIMetrics:
-        """
-        Get AI metrics from a LangChain provider response.
-        
-        This method extracts token usage information and success status from LangChain responses
-        and returns a LaunchDarkly LDAIMetrics object.
-        
-        :param response: The response from the LangChain model
-        :return: LDAIMetrics with success status and token usage
-        """
-        # Extract token usage if available
-        usage: Optional[TokenUsage] = None
-        if hasattr(response, 'response_metadata') and response.response_metadata:
-            token_usage = response.response_metadata.get('token_usage')
-            if token_usage:
-                usage = TokenUsage(
-                    total=token_usage.get('total_tokens', 0) or token_usage.get('totalTokens', 0) or 0,
-                    input=token_usage.get('prompt_tokens', 0) or token_usage.get('promptTokens', 0) or 0,
-                    output=token_usage.get('completion_tokens', 0) or token_usage.get('completionTokens', 0) or 0,
-                )
-
-        # LangChain responses that complete successfully are considered successful by default
-        return LDAIMetrics(success=True, usage=usage)
-
-    @staticmethod
-    def convert_messages_to_langchain(messages: List[LDMessage]) -> List[BaseMessage]:
-        """
-        Convert LaunchDarkly messages to LangChain messages.
-        
-        This helper method enables developers to work directly with LangChain message types
-        while maintaining compatibility with LaunchDarkly's standardized message format.
-        
-        :param messages: List of LDMessage objects
-        :return: List of LangChain message objects
-        """
-        result: List[BaseMessage] = []
-        for msg in messages:
-            if msg.role == 'system':
-                result.append(SystemMessage(content=msg.content))
-            elif msg.role == 'user':
-                result.append(HumanMessage(content=msg.content))
-            elif msg.role == 'assistant':
-                result.append(AIMessage(content=msg.content))
-            else:
-                raise ValueError(f'Unsupported message role: {msg.role}')
-        return result
-
-    @staticmethod
-    async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel:
-        """
-        Create a LangChain model from an AI configuration.
-        
-        This public helper method enables developers to initialize their own LangChain models
-        using LaunchDarkly AI configurations.
-        
-        :param ai_config: The LaunchDarkly AI configuration
-        :return: A configured LangChain BaseChatModel
-        """
-        model_name = ai_config.model.name if ai_config.model else ''
-        provider = ai_config.provider.name if ai_config.provider else ''
-        parameters = ai_config.model.get_parameter('parameters') if ai_config.model else {}
-        if not isinstance(parameters, dict):
-            parameters = {}
-
-        # Use LangChain's init_chat_model to support multiple providers
-        # Note: This requires langchain package to be installed
-        try:
-            # Try to import init_chat_model from langchain.chat_models
-            # This is available in langchain >= 0.1.0
-            try:
-                from langchain.chat_models import init_chat_model
-            except ImportError:
-                # Fallback for older versions or different import path
-                from langchain.chat_models.universal import init_chat_model
-            
-            # Map provider name
-            langchain_provider = LangChainProvider.map_provider(provider)
-            
-            # Create model configuration
-            model_kwargs = {**parameters}
-            if langchain_provider:
-                model_kwargs['model_provider'] = langchain_provider
-            
-            # Initialize the chat model (init_chat_model may be async or sync)
-            result = init_chat_model(model_name, **model_kwargs)
-            # Handle both sync and async initialization
-            if hasattr(result, '__await__'):
-                return await result
-            return result
-        except ImportError as e:
-            raise ImportError(
-                'langchain package is required for LangChainProvider. '
-                'Install it with: pip install langchain langchain-core'
-            ) from e
+__all__ = ['LangChainProvider']
 
diff --git a/packages/langchain/ldai/providers/langchain/langchain_provider.py b/packages/langchain/ldai/providers/langchain/langchain_provider.py
new file mode 100644
index 0000000..ece7fbf
--- /dev/null
+++ b/packages/langchain/ldai/providers/langchain/langchain_provider.py
@@ -0,0 +1,272 @@
+"""LangChain implementation of AIProvider for LaunchDarkly AI SDK."""
+
+from typing import Any, Dict, List, Optional
+
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
+
+from ldai.models import AIConfigKind, LDMessage
+from ldai.providers.ai_provider import AIProvider
+from ldai.providers.types import ChatResponse, LDAIMetrics, StructuredResponse
+from ldai.tracker import TokenUsage
+
+
+class LangChainProvider(AIProvider):
+    """
+    LangChain implementation of AIProvider.
+    
+    This provider integrates LangChain models with LaunchDarkly's tracking capabilities.
+    """
+
+    def __init__(self, llm: BaseChatModel, logger: Optional[Any] = None):
+        """
+        Initialize the LangChain provider.
+        
+        :param llm: LangChain BaseChatModel instance
+        :param logger: Optional logger for logging provider operations
+        """
+        super().__init__(logger)
+        self._llm = llm
+
+    @staticmethod
+    async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'LangChainProvider':
+        """
+        Static factory method to create a LangChain AIProvider from an AI configuration.
+        
+        :param ai_config: The LaunchDarkly AI configuration
+        :param logger: Optional logger for the provider
+        :return: Configured LangChainProvider instance
+        """
+        llm = await LangChainProvider.create_langchain_model(ai_config)
+        return LangChainProvider(llm, logger)
+
+    async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
+        """
+        Invoke the LangChain model with an array of messages.
+        
+        :param messages: Array of LDMessage objects representing the conversation
+        :return: ChatResponse containing the model's response
+        """
+        try:
+            # Convert LDMessage[] to LangChain messages
+            langchain_messages = LangChainProvider.convert_messages_to_langchain(messages)
+
+            # Get the LangChain response
+            response: AIMessage = await self._llm.ainvoke(langchain_messages)
+
+            # Generate metrics early (assumes success by default)
+            metrics = LangChainProvider.get_ai_metrics_from_response(response)
+
+            # Extract text content from the response
+            content: str = ''
+            if isinstance(response.content, str):
+                content = response.content
+            else:
+                # Log warning for non-string content (likely multimodal)
+                if self.logger:
+                    self.logger.warn(
+                        f"Multimodal response not supported, expecting a string. "
+                        f"Content type: {type(response.content)}, Content: {response.content}"
+                    )
+                # Update metrics to reflect content loss
+                metrics.success = False
+
+            # Create the assistant message
+            from ldai.models import LDMessage
+            assistant_message = LDMessage(role='assistant', content=content)
+
+            return ChatResponse(
+                message=assistant_message,
+                metrics=metrics,
+            )
+        except Exception as error:
+            if self.logger:
+                self.logger.warn(f'LangChain model invocation failed: {error}')
+
+            from ldai.models import LDMessage
+            return ChatResponse(
+                message=LDMessage(role='assistant', content=''),
+                metrics=LDAIMetrics(success=False, usage=None),
+            )
+
+    async def invoke_structured_model(
+        self,
+        messages: List[LDMessage],
+        response_structure: Dict[str, Any],
+    ) -> StructuredResponse:
+        """
+        Invoke the LangChain model with structured output support.
+        
+        :param messages: Array of LDMessage objects representing the conversation
+        :param response_structure: Dictionary of output configurations keyed by output name
+        :return: StructuredResponse containing the structured data
+        """
+        try:
+            # Convert LDMessage[] to LangChain messages
+            langchain_messages = LangChainProvider.convert_messages_to_langchain(messages)
+
+            # Get the LangChain response with structured output
+            # Note: with_structured_output is available on BaseChatModel in newer LangChain versions
+            if hasattr(self._llm, 'with_structured_output'):
+                structured_llm = self._llm.with_structured_output(response_structure)
+                response = await structured_llm.ainvoke(langchain_messages)
+            else:
+                # Fallback: invoke normally and try to parse as JSON
+                response_obj = await self._llm.ainvoke(langchain_messages)
+                if isinstance(response_obj, AIMessage):
+                    import json
+                    try:
+                        response = json.loads(response_obj.content)
+                    except json.JSONDecodeError:
+                        response = {'content': response_obj.content}
+                else:
+                    response = response_obj
+
+            # Using structured output doesn't support metrics
+            metrics = LDAIMetrics(
+                success=True,
+                usage=TokenUsage(total=0, input=0, output=0),
+            )
+
+            import json
+            return StructuredResponse(
+                data=response if isinstance(response, dict) else {'result': response},
+                raw_response=json.dumps(response) if not isinstance(response, str) else response,
+                metrics=metrics,
+            )
+        except Exception as error:
+            if self.logger:
+                self.logger.warn(f'LangChain structured model invocation failed: {error}')
+
+            return StructuredResponse(
+                data={},
+                raw_response='',
+                metrics=LDAIMetrics(
+                    success=False,
+                    usage=TokenUsage(total=0, input=0, output=0),
+                ),
+            )
+
+    def get_chat_model(self) -> BaseChatModel:
+        """
+        Get the underlying LangChain model instance.
+        
+        :return: The LangChain BaseChatModel instance
+        """
+        return self._llm
+
+    @staticmethod
+    def map_provider(ld_provider_name: str) -> str:
+        """
+        Map LaunchDarkly provider names to LangChain provider names.
+        
+        This method enables seamless integration between LaunchDarkly's standardized
+        provider naming and LangChain's naming conventions.
+        
+        :param ld_provider_name: LaunchDarkly provider name
+        :return: LangChain provider name
+        """
+        lowercased_name = ld_provider_name.lower()
+
+        mapping: Dict[str, str] = {
+            'gemini': 'google-genai',
+        }
+
+        return mapping.get(lowercased_name, lowercased_name)
+
+    @staticmethod
+    def get_ai_metrics_from_response(response: AIMessage) -> LDAIMetrics:
+        """
+        Get AI metrics from a LangChain provider response.
+        
+        This method extracts token usage information and success status from LangChain responses
+        and returns a LaunchDarkly LDAIMetrics object.
+        
+        :param response: The response from the LangChain model
+        :return: LDAIMetrics with success status and token usage
+        """
+        # Extract token usage if available
+        usage: Optional[TokenUsage] = None
+        if hasattr(response, 'response_metadata') and response.response_metadata:
+            token_usage = response.response_metadata.get('token_usage')
+            if token_usage:
+                usage = TokenUsage(
+                    total=token_usage.get('total_tokens', 0) or token_usage.get('totalTokens', 0) or 0,
+                    input=token_usage.get('prompt_tokens', 0) or token_usage.get('promptTokens', 0) or 0,
+                    output=token_usage.get('completion_tokens', 0) or token_usage.get('completionTokens', 0) or 0,
+                )
+
+        # LangChain responses that complete successfully are considered successful by default
+        return LDAIMetrics(success=True, usage=usage)
+
+    @staticmethod
+    def convert_messages_to_langchain(messages: List[LDMessage]) -> List[BaseMessage]:
+        """
+        Convert LaunchDarkly messages to LangChain messages.
+        
+        This helper method enables developers to work directly with LangChain message types
+        while maintaining compatibility with LaunchDarkly's standardized message format.
+        
+        :param messages: List of LDMessage objects
+        :return: List of LangChain message objects
+        """
+        result: List[BaseMessage] = []
+        for msg in messages:
+            if msg.role == 'system':
+                result.append(SystemMessage(content=msg.content))
+            elif msg.role == 'user':
+                result.append(HumanMessage(content=msg.content))
+            elif msg.role == 'assistant':
+                result.append(AIMessage(content=msg.content))
+            else:
+                raise ValueError(f'Unsupported message role: {msg.role}')
+        return result
+
+    @staticmethod
+    async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel:
+        """
+        Create a LangChain model from an AI configuration.
+        
+        This public helper method enables developers to initialize their own LangChain models
+        using LaunchDarkly AI configurations.
+        
+        :param ai_config: The LaunchDarkly AI configuration
+        :return: A configured LangChain BaseChatModel
+        """
+        model_name = ai_config.model.name if ai_config.model else ''
+        provider = ai_config.provider.name if ai_config.provider else ''
+        parameters = ai_config.model.get_parameter('parameters') if ai_config.model else {}
+        if not isinstance(parameters, dict):
+            parameters = {}
+
+        # Use LangChain's init_chat_model to support multiple providers
+        # Note: This requires langchain package to be installed
+        try:
+            # Try to import init_chat_model from langchain.chat_models
+            # This is available in langchain >= 0.1.0
+            try:
+                from langchain.chat_models import init_chat_model
+            except ImportError:
+                # Fallback for older versions or different import path
+                from langchain.chat_models.universal import init_chat_model
+            
+            # Map provider name
+            langchain_provider = LangChainProvider.map_provider(provider)
+            
+            # Create model configuration
+            model_kwargs = {**parameters}
+            if langchain_provider:
+                model_kwargs['model_provider'] = langchain_provider
+            
+            # Initialize the chat model (init_chat_model may be async or sync)
+            result = init_chat_model(model_name, **model_kwargs)
+            # Handle both sync and async initialization
+            if hasattr(result, '__await__'):
+                return await result
+            return result
+        except ImportError as e:
+            raise ImportError(
+                'langchain package is required for LangChainProvider. '
+                'Install it with: pip install langchain langchain-core'
+            ) from e
+

From 754cdab8d56e7e3d8f1268e085f36e09013a6a2f Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 22:41:43 +0100
Subject: [PATCH 31/37] logger should not be optional

---
 ldai/testing/test_langchain_provider.py       | 237 ++++++++++++++++++
 packages/core/ldai/chat/tracked_chat.py       |  14 +-
 packages/core/ldai/client.py                  |  26 +-
 packages/core/ldai/judge/ai_judge.py          |  58 ++---
 .../ldai/judge/evaluation_schema_builder.py   |   2 +-
 packages/core/ldai/providers/ai_provider.py   |  20 +-
 .../ldai/providers/ai_provider_factory.py     |  53 ++--
 .../providers/langchain/langchain_provider.py |  37 ++-
 .../tests/test_langchain_provider.py          |  23 +-
 9 files changed, 333 insertions(+), 137 deletions(-)
 create mode 100644 ldai/testing/test_langchain_provider.py

diff --git a/ldai/testing/test_langchain_provider.py b/ldai/testing/test_langchain_provider.py
new file mode 100644
index 0000000..3bb83a1
--- /dev/null
+++ b/ldai/testing/test_langchain_provider.py
@@ -0,0 +1,237 @@
+"""Tests for LangChain provider implementation."""
+
+import pytest
+from unittest.mock import AsyncMock, Mock
+
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
+
+from ldai.models import LDMessage
+from ldai.providers.langchain import LangChainProvider
+from ldai.tracker import TokenUsage
+
+
+class TestMessageConversion:
+    """Test conversion between LD messages and LangChain messages."""
+
+    def test_convert_multiple_messages(self):
+        """Test converting a conversation with all message types."""
+        ld_messages = [
+            LDMessage(role='system', content='You are helpful'),
+            LDMessage(role='user', content='Hello'),
+            LDMessage(role='assistant', content='Hi there!'),
+        ]
+        lc_messages = LangChainProvider.convert_messages_to_langchain(ld_messages)
+        
+        assert len(lc_messages) == 3
+        assert isinstance(lc_messages[0], SystemMessage)
+        assert isinstance(lc_messages[1], HumanMessage)
+        assert isinstance(lc_messages[2], AIMessage)
+        assert lc_messages[0].content == 'You are helpful'
+        assert lc_messages[1].content == 'Hello'
+        assert lc_messages[2].content == 'Hi there!'
+
+    def test_convert_unsupported_role_raises_error(self):
+        """Test that unsupported message roles raise ValueError."""
+        ld_messages = [LDMessage(role='function', content='Function result')]
+        
+        with pytest.raises(ValueError, match='Unsupported message role: function'):
+            LangChainProvider.convert_messages_to_langchain(ld_messages)
+
+
+class TestMetricsExtraction:
+    """Test metrics extraction from LangChain response metadata."""
+
+    def test_extract_metrics_with_token_usage(self):
+        """Test extracting token usage from response metadata."""
+        response = AIMessage(
+            content='Hello, world!',
+            response_metadata={
+                'token_usage': {
+                    'total_tokens': 100,
+                    'prompt_tokens': 60,
+                    'completion_tokens': 40,
+                }
+            }
+        )
+        
+        metrics = LangChainProvider.get_ai_metrics_from_response(response)
+        
+        assert metrics.success is True
+        assert metrics.usage is not None
+        assert metrics.usage.total == 100
+        assert metrics.usage.input == 60
+        assert metrics.usage.output == 40
+
+    def test_extract_metrics_with_camel_case_token_usage(self):
+        """Test extracting token usage with camelCase keys (some providers use this)."""
+        response = AIMessage(
+            content='Hello, world!',
+            response_metadata={
+                'token_usage': {
+                    'totalTokens': 150,
+                    'promptTokens': 90,
+                    'completionTokens': 60,
+                }
+            }
+        )
+        
+        metrics = LangChainProvider.get_ai_metrics_from_response(response)
+        
+        assert metrics.success is True
+        assert metrics.usage is not None
+        assert metrics.usage.total == 150
+        assert metrics.usage.input == 90
+        assert metrics.usage.output == 60
+
+    def test_extract_metrics_without_token_usage(self):
+        """Test metrics extraction when no token usage is available."""
+        response = AIMessage(content='Hello, world!')
+        
+        metrics = LangChainProvider.get_ai_metrics_from_response(response)
+        
+        assert metrics.success is True
+        assert metrics.usage is None
+
+
+class TestInvokeModel:
+    """Test model invocation with LangChain provider."""
+
+    @pytest.mark.asyncio
+    async def test_invoke_model_success(self):
+        """Test successful model invocation."""
+        mock_llm = AsyncMock()
+        mock_response = AIMessage(
+            content='Hello, user!',
+            response_metadata={
+                'token_usage': {
+                    'total_tokens': 20,
+                    'prompt_tokens': 10,
+                    'completion_tokens': 10,
+                }
+            }
+        )
+        mock_llm.ainvoke.return_value = mock_response
+        
+        provider = LangChainProvider(mock_llm)
+        messages = [LDMessage(role='user', content='Hello')]
+        
+        response = await provider.invoke_model(messages)
+        
+        assert response.message.role == 'assistant'
+        assert response.message.content == 'Hello, user!'
+        assert response.metrics.success is True
+        assert response.metrics.usage is not None
+        assert response.metrics.usage.total == 20
+
+    @pytest.mark.asyncio
+    async def test_invoke_model_with_multimodal_content_warning(self):
+        """Test that non-string content triggers warning and marks as failure."""
+        mock_llm = AsyncMock()
+        mock_response = AIMessage(
+            content=['text', {'type': 'image'}],  # Non-string content
+            response_metadata={'token_usage': {'total_tokens': 20}}
+        )
+        mock_llm.ainvoke.return_value = mock_response
+        
+        mock_logger = Mock()
+        provider = LangChainProvider(mock_llm, logger=mock_logger)
+        messages = [LDMessage(role='user', content='Describe this image')]
+        
+        response = await provider.invoke_model(messages)
+        
+        # Should warn about multimodal content
+        mock_logger.warn.assert_called_once()
+        assert 'Multimodal response not supported' in str(mock_logger.warn.call_args)
+        
+        # Should mark as failure
+        assert response.metrics.success is False
+        assert response.message.content == ''
+
+    @pytest.mark.asyncio
+    async def test_invoke_model_with_exception(self):
+        """Test model invocation handles exceptions gracefully."""
+        mock_llm = AsyncMock()
+        mock_llm.ainvoke.side_effect = Exception('Model API error')
+        
+        mock_logger = Mock()
+        provider = LangChainProvider(mock_llm, logger=mock_logger)
+        messages = [LDMessage(role='user', content='Hello')]
+        
+        response = await provider.invoke_model(messages)
+        
+        # Should log the error
+        mock_logger.warn.assert_called_once()
+        assert 'LangChain model invocation failed' in str(mock_logger.warn.call_args)
+        
+        # Should return failure response
+        assert response.message.role == 'assistant'
+        assert response.message.content == ''
+        assert response.metrics.success is False
+        assert response.metrics.usage is None
+
+
+class TestInvokeStructuredModel:
+    """Test structured output invocation."""
+
+    @pytest.mark.asyncio
+    async def test_invoke_structured_model_with_support(self):
+        """Test structured output when model supports with_structured_output."""
+        mock_llm = Mock()
+        mock_structured_llm = AsyncMock()
+        mock_structured_llm.ainvoke.return_value = {
+            'answer': 'Paris',
+            'confidence': 0.95
+        }
+        mock_llm.with_structured_output.return_value = mock_structured_llm
+        
+        provider = LangChainProvider(mock_llm)
+        messages = [LDMessage(role='user', content='What is the capital of France?')]
+        schema = {'answer': 'string', 'confidence': 'number'}
+        
+        response = await provider.invoke_structured_model(messages, schema)
+        
+        assert response.data == {'answer': 'Paris', 'confidence': 0.95}
+        assert response.metrics.success is True
+        mock_llm.with_structured_output.assert_called_once_with(schema)
+
+    @pytest.mark.asyncio
+    async def test_invoke_structured_model_without_support_json_fallback(self):
+        """Test structured output fallback to JSON parsing when not supported."""
+        mock_llm = AsyncMock()
+        # Model doesn't have with_structured_output
+        delattr(mock_llm, 'with_structured_output') if hasattr(mock_llm, 'with_structured_output') else None
+        
+        mock_response = AIMessage(content='{"answer": "Berlin", "confidence": 0.9}')
+        mock_llm.ainvoke.return_value = mock_response
+        
+        provider = LangChainProvider(mock_llm)
+        messages = [LDMessage(role='user', content='What is the capital of Germany?')]
+        schema = {'answer': 'string', 'confidence': 'number'}
+        
+        response = await provider.invoke_structured_model(messages, schema)
+        
+        assert response.data == {'answer': 'Berlin', 'confidence': 0.9}
+        assert response.metrics.success is True
+
+    @pytest.mark.asyncio
+    async def test_invoke_structured_model_with_exception(self):
+        """Test structured output handles exceptions gracefully."""
+        mock_llm = Mock()
+        mock_llm.with_structured_output.side_effect = Exception('Structured output error')
+        
+        mock_logger = Mock()
+        provider = LangChainProvider(mock_llm, logger=mock_logger)
+        messages = [LDMessage(role='user', content='Question')]
+        schema = {'answer': 'string'}
+        
+        response = await provider.invoke_structured_model(messages, schema)
+        
+        # Should log the error
+        mock_logger.warn.assert_called_once()
+        assert 'LangChain structured model invocation failed' in str(mock_logger.warn.call_args)
+        
+        # Should return failure response
+        assert response.data == {}
+        assert response.raw_response == ''
+        assert response.metrics.success is False
+
diff --git a/packages/core/ldai/chat/tracked_chat.py b/packages/core/ldai/chat/tracked_chat.py
index 0fc9873..e7bd8f3 100644
--- a/packages/core/ldai/chat/tracked_chat.py
+++ b/packages/core/ldai/chat/tracked_chat.py
@@ -1,7 +1,8 @@
 """TrackedChat implementation for managing AI chat conversations."""
 
 import asyncio
-from typing import Any, Dict, List, Optional
+import logging
+from typing import Dict, List, Optional
 
 from ldai.judge import AIJudge
 from ldai.models import AICompletionConfig, LDMessage
@@ -25,7 +26,6 @@ def __init__(
         tracker: LDAIConfigTracker,
         provider: AIProvider,
         judges: Optional[Dict[str, AIJudge]] = None,
-        logger: Optional[Any] = None,
     ):
         """
         Initialize the TrackedChat.
@@ -34,13 +34,12 @@ def __init__(
         :param tracker: The tracker for the completion configuration
         :param provider: The AI provider to use for chat
         :param judges: Optional dictionary of judge instances keyed by their configuration keys
-        :param logger: Optional logger for logging
         """
         self._ai_config = ai_config
         self._tracker = tracker
         self._provider = provider
         self._judges = judges or {}
-        self._logger = logger
+        self._logger = logging.getLogger(f'{__name__}.{self.__class__.__name__}')
         self._messages: List[LDMessage] = []
 
     async def invoke(self, prompt: str) -> ChatResponse:
@@ -99,10 +98,9 @@ def _start_judge_evaluations(
         async def evaluate_judge(judge_config):
             judge = self._judges.get(judge_config.key)
             if not judge:
-                if self._logger:
-                    self._logger.warn(
-                        f"Judge configuration is not enabled: {judge_config.key}",
-                    )
+                self._logger.warning(
+                    f"Judge configuration is not enabled: {judge_config.key}",
+                )
                 return None
 
             eval_result = await judge.evaluate_messages(
diff --git a/packages/core/ldai/client.py b/packages/core/ldai/client.py
index 086e99b..a30f796 100644
--- a/packages/core/ldai/client.py
+++ b/packages/core/ldai/client.py
@@ -158,31 +158,34 @@ async def create_judge(
         self._client.track('$ld:ai:judge:function:createJudge', context, key, 1)
 
         try:
+            # Overwrite reserved variables to ensure they remain as placeholders for judge evaluation
+            extended_variables = dict(variables) if variables else {}
+            
             # Warn if reserved variables are provided
             if variables:
                 if 'message_history' in variables:
-                    # Note: Python doesn't have a logger on the client, but we could add one
-                    pass  # Would log warning if logger available
+                    self._logger.warning(
+                        'Variable "message_history" is reserved for judge evaluation and will be overwritten'
+                    )
                 if 'response_to_evaluate' in variables:
-                    pass  # Would log warning if logger available
-
-            # Overwrite reserved variables to ensure they remain as placeholders for judge evaluation
-            extended_variables = dict(variables) if variables else {}
+                    self._logger.warning(
+                        'Variable "response_to_evaluate" is reserved for judge evaluation and will be overwritten'
+                    )
+            
             extended_variables['message_history'] = '{{message_history}}'
             extended_variables['response_to_evaluate'] = '{{response_to_evaluate}}'
 
             judge_config = self.judge_config(key, context, default_value, extended_variables)
 
             if not judge_config.enabled or not judge_config.tracker:
-                # Would log info if logger available
                 return None
 
             # Create AI provider for the judge
-            provider = await AIProviderFactory.create(judge_config, self._logger, default_ai_provider)
+            provider = await AIProviderFactory.create(judge_config, default_ai_provider)
             if not provider:
                 return None
 
-            return AIJudge(judge_config, judge_config.tracker, provider, self._logger)
+            return AIJudge(judge_config, judge_config.tracker, provider)
         except Exception as error:
             # Would log error if logger available
             return None
@@ -278,10 +281,9 @@ async def create_chat(
         config = self.completion_config(key, context, default_value, variables)
 
         if not config.enabled or not config.tracker:
-            # Would log info if logger available
             return None
 
-        provider = await AIProviderFactory.create(config, self._logger, default_ai_provider)
+        provider = await AIProviderFactory.create(config, default_ai_provider)
         if not provider:
             return None
 
@@ -294,7 +296,7 @@ async def create_chat(
                 default_ai_provider,
             )
 
-        return TrackedChat(config, config.tracker, provider, judges, self._logger)
+        return TrackedChat(config, config.tracker, provider, judges)
 
     def agent_config(
         self,
diff --git a/packages/core/ldai/judge/ai_judge.py b/packages/core/ldai/judge/ai_judge.py
index 3caad65..7b1f3ea 100644
--- a/packages/core/ldai/judge/ai_judge.py
+++ b/packages/core/ldai/judge/ai_judge.py
@@ -1,7 +1,8 @@
 """Judge implementation for AI evaluation."""
 
+import logging
 import random
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
 
 import chevron
 
@@ -26,7 +27,6 @@ def __init__(
         ai_config: AIJudgeConfig,
         ai_config_tracker: LDAIConfigTracker,
         ai_provider: AIProvider,
-        logger: Optional[Any] = None,
     ):
         """
         Initialize the Judge.
@@ -34,12 +34,11 @@ def __init__(
         :param ai_config: The judge AI configuration
         :param ai_config_tracker: The tracker for the judge configuration
         :param ai_provider: The AI provider to use for evaluation
-        :param logger: Optional logger for logging
         """
         self._ai_config = ai_config
         self._ai_config_tracker = ai_config_tracker
         self._ai_provider = ai_provider
-        self._logger = logger
+        self._logger = logging.getLogger(f'{__name__}.{self.__class__.__name__}')
         self._evaluation_response_structure = EvaluationSchemaBuilder.build(
             ai_config.evaluation_metric_keys
         )
@@ -60,20 +59,17 @@ async def evaluate(
         """
         try:
             if not self._ai_config.evaluation_metric_keys or len(self._ai_config.evaluation_metric_keys) == 0:
-                if self._logger:
-                    self._logger.warn(
-                        'Judge configuration is missing required evaluationMetricKeys'
-                    )
+                self._logger.warning(
+                    'Judge configuration is missing required evaluationMetricKeys'
+                )
                 return None
 
             if not self._ai_config.messages:
-                if self._logger:
-                    self._logger.warn('Judge configuration must include messages')
+                self._logger.warning('Judge configuration must include messages')
                 return None
 
             if random.random() > sampling_rate:
-                if self._logger:
-                    self._logger.debug(f'Judge evaluation skipped due to sampling rate: {sampling_rate}')
+                self._logger.debug(f'Judge evaluation skipped due to sampling rate: {sampling_rate}')
                 return None
 
             messages = self._construct_evaluation_messages(input_text, output_text)
@@ -89,8 +85,7 @@ async def evaluate(
             evals = self._parse_evaluation_response(response.data)
 
             if len(evals) != len(self._ai_config.evaluation_metric_keys):
-                if self._logger:
-                    self._logger.warn('Judge evaluation did not return all evaluations')
+                self._logger.warning('Judge evaluation did not return all evaluations')
                 success = False
 
             return JudgeResponse(
@@ -98,17 +93,16 @@ async def evaluate(
                 success=success,
             )
         except Exception as error:
-            if self._logger:
-                self._logger.error(f'Judge evaluation failed: {error}')
+            self._logger.error(f'Judge evaluation failed: {error}')
             return JudgeResponse(
                 evals={},
                 success=False,
-                error=str(error) if isinstance(error, Exception) else 'Unknown error',
+                error=str(error),
             )
 
     async def evaluate_messages(
         self,
-        messages: list[LDMessage],
+        messages: List[LDMessage],
         response: ChatResponse,
         sampling_ratio: float = 1.0,
     ) -> Optional[JudgeResponse]:
@@ -149,7 +143,7 @@ def get_provider(self) -> AIProvider:
         """
         return self._ai_provider
 
-    def _construct_evaluation_messages(self, input_text: str, output_text: str) -> list[LDMessage]:
+    def _construct_evaluation_messages(self, input_text: str, output_text: str) -> List[LDMessage]:
         """
         Constructs evaluation messages by combining judge's config messages with input/output.
 
@@ -160,7 +154,7 @@ def _construct_evaluation_messages(self, input_text: str, output_text: str) -> l
         if not self._ai_config.messages:
             return []
 
-        messages: list[LDMessage] = []
+        messages: List[LDMessage] = []
         for msg in self._ai_config.messages:
             # Interpolate message content with reserved variables
             content = self._interpolate_message(msg.content, {
@@ -192,8 +186,7 @@ def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScor
         results: Dict[str, EvalScore] = {}
 
         if not data.get('evaluations') or not isinstance(data['evaluations'], dict):
-            if self._logger:
-                self._logger.warn('Invalid response: missing or invalid evaluations object')
+            self._logger.warning('Invalid response: missing or invalid evaluations object')
             return results
 
         evaluations = data['evaluations']
@@ -202,27 +195,24 @@ def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScor
             evaluation = evaluations.get(metric_key)
 
             if not evaluation or not isinstance(evaluation, dict):
-                if self._logger:
-                    self._logger.warn(f'Missing evaluation for metric key: {metric_key}')
+                self._logger.warning(f'Missing evaluation for metric key: {metric_key}')
                 continue
 
             score = evaluation.get('score')
             reasoning = evaluation.get('reasoning')
 
             if not isinstance(score, (int, float)) or score < 0 or score > 1:
-                if self._logger:
-                    self._logger.warn(
-                        f'Invalid score evaluated for {metric_key}: {score}. '
-                        'Score must be a number between 0 and 1 inclusive'
-                    )
+                self._logger.warning(
+                    f'Invalid score evaluated for {metric_key}: {score}. '
+                    'Score must be a number between 0 and 1 inclusive'
+                )
                 continue
 
             if not isinstance(reasoning, str):
-                if self._logger:
-                    self._logger.warn(
-                        f'Invalid reasoning evaluated for {metric_key}: {reasoning}. '
-                        'Reasoning must be a string'
-                    )
+                self._logger.warning(
+                    f'Invalid reasoning evaluated for {metric_key}: {reasoning}. '
+                    'Reasoning must be a string'
+                )
                 continue
 
             results[metric_key] = EvalScore(score=float(score), reasoning=reasoning)
diff --git a/packages/core/ldai/judge/evaluation_schema_builder.py b/packages/core/ldai/judge/evaluation_schema_builder.py
index c996f08..8fbc712 100644
--- a/packages/core/ldai/judge/evaluation_schema_builder.py
+++ b/packages/core/ldai/judge/evaluation_schema_builder.py
@@ -1,6 +1,6 @@
 """Internal class for building dynamic evaluation response schemas."""
 
-from typing import Any, Dict
+from typing import Any, Dict, List
 
 
 class EvaluationSchemaBuilder:
diff --git a/packages/core/ldai/providers/ai_provider.py b/packages/core/ldai/providers/ai_provider.py
index 2fec172..3e17221 100644
--- a/packages/core/ldai/providers/ai_provider.py
+++ b/packages/core/ldai/providers/ai_provider.py
@@ -1,7 +1,8 @@
 """Abstract base class for AI providers."""
 
+import logging
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional
 
 from ldai.models import AIConfigKind, LDMessage
 from ldai.providers.types import ChatResponse, LDAIMetrics, StructuredResponse
@@ -18,13 +19,13 @@ class AIProvider(ABC):
     for better extensibility and backwards compatibility.
     """
 
-    def __init__(self, logger: Optional[Any] = None):
+    def __init__(self):
         """
         Initialize the AI provider.
-
-        :param logger: Optional logger for logging provider operations.
+        
+        Creates a logger for this provider instance.
         """
-        self.logger = logger
+        self._logger = logging.getLogger(f'{__name__}.{self.__class__.__name__}')
 
     async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
         """
@@ -39,8 +40,7 @@ async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
         :param messages: Array of LDMessage objects representing the conversation
         :return: ChatResponse containing the model's response
         """
-        if self.logger:
-            self.logger.warn('invokeModel not implemented by this provider')
+        self._logger.warning('invokeModel not implemented by this provider')
 
         return ChatResponse(
             message=LDMessage(role='assistant', content=''),
@@ -65,8 +65,7 @@ async def invoke_structured_model(
         :param response_structure: Dictionary of output configurations keyed by output name
         :return: StructuredResponse containing the structured data
         """
-        if self.logger:
-            self.logger.warn('invokeStructuredModel not implemented by this provider')
+        self._logger.warning('invokeStructuredModel not implemented by this provider')
 
         return StructuredResponse(
             data={},
@@ -76,7 +75,7 @@ async def invoke_structured_model(
 
     @staticmethod
     @abstractmethod
-    async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'AIProvider':
+    async def create(ai_config: AIConfigKind) -> 'AIProvider':
         """
         Static method that constructs an instance of the provider.
 
@@ -84,7 +83,6 @@ async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'AIPr
         that accepts an AIConfigKind and returns a configured instance.
 
         :param ai_config: The LaunchDarkly AI configuration
-        :param logger: Optional logger for the provider
         :return: Configured provider instance
         """
         raise NotImplementedError('Provider implementations must override the static create method')
diff --git a/packages/core/ldai/providers/ai_provider_factory.py b/packages/core/ldai/providers/ai_provider_factory.py
index 9f5a6e4..7c1dec2 100644
--- a/packages/core/ldai/providers/ai_provider_factory.py
+++ b/packages/core/ldai/providers/ai_provider_factory.py
@@ -1,7 +1,8 @@
 """Factory for creating AIProvider instances based on the provider configuration."""
 
 import importlib
-from typing import Any, List, Literal, Optional, Type
+import logging
+from typing import List, Literal, Optional, Type
 
 from ldai.models import AIConfigKind
 from ldai.providers.ai_provider import AIProvider
@@ -21,10 +22,11 @@ class AIProviderFactory:
     Factory for creating AIProvider instances based on the provider configuration.
     """
 
+    _logger = logging.getLogger(__name__)
+
     @staticmethod
     async def create(
         ai_config: AIConfigKind,
-        logger: Optional[Any] = None,
         default_ai_provider: Optional[SupportedAIProvider] = None,
     ) -> Optional[AIProvider]:
         """
@@ -34,7 +36,6 @@ async def create(
         Returns None if the provider is not supported.
 
         :param ai_config: The AI configuration
-        :param logger: Optional logger for logging provider initialization
         :param default_ai_provider: Optional default AI provider to use
         :return: AIProvider instance or None if not supported
         """
@@ -44,15 +45,14 @@ async def create(
 
         # Try each provider in order
         for provider_type in providers_to_try:
-            provider = await AIProviderFactory._try_create_provider(provider_type, ai_config, logger)
+            provider = await AIProviderFactory._try_create_provider(provider_type, ai_config)
             if provider:
                 return provider
 
         # If no provider was successfully created, log a warning
-        if logger:
-            logger.warn(
-                f"Provider is not supported or failed to initialize: {provider_name or 'unknown'}"
-            )
+        AIProviderFactory._logger.warning(
+            f"Provider is not supported or failed to initialize: {provider_name or 'unknown'}"
+        )
         return None
 
     @staticmethod
@@ -89,27 +89,24 @@ def _get_providers_to_try(
     async def _try_create_provider(
         provider_type: SupportedAIProvider,
         ai_config: AIConfigKind,
-        logger: Optional[Any] = None,
     ) -> Optional[AIProvider]:
         """
         Try to create a provider of the specified type.
 
         :param provider_type: Type of provider to create
         :param ai_config: AI configuration
-        :param logger: Optional logger
         :return: AIProvider instance or None if creation failed
         """
         # Handle built-in providers (part of this package)
         if provider_type == 'langchain':
             try:
                 from ldai.providers.langchain import LangChainProvider
-                return await LangChainProvider.create(ai_config, logger)
+                return await LangChainProvider.create(ai_config)
             except ImportError as error:
-                if logger:
-                    logger.warn(
-                        f"Error creating LangChainProvider: {error}. "
-                        f"Make sure langchain and langchain-core packages are installed."
-                    )
+                AIProviderFactory._logger.warning(
+                    f"Error creating LangChainProvider: {error}. "
+                    f"Make sure langchain and langchain-core packages are installed."
+                )
                 return None
 
         # TODO: REL-10773 OpenAI provider
@@ -125,7 +122,7 @@ async def _try_create_provider(
 
         package_name, provider_class_name = provider_mappings[provider_type]
         return await AIProviderFactory._create_provider(
-            package_name, provider_class_name, ai_config, logger
+            package_name, provider_class_name, ai_config
         )
 
     @staticmethod
@@ -133,7 +130,6 @@ async def _create_provider(
         package_name: str,
         provider_class_name: str,
         ai_config: AIConfigKind,
-        logger: Optional[Any] = None,
     ) -> Optional[AIProvider]:
         """
         Create a provider instance dynamically.
@@ -141,7 +137,6 @@ async def _create_provider(
         :param package_name: Name of the package containing the provider
         :param provider_class_name: Name of the provider class
         :param ai_config: AI configuration
-        :param logger: Optional logger
         :return: AIProvider instance or None if creation failed
         """
         try:
@@ -150,18 +145,16 @@ async def _create_provider(
             module = importlib.import_module(package_name)
             provider_class: Type[AIProvider] = getattr(module, provider_class_name)
 
-            provider = await provider_class.create(ai_config, logger)
-            if logger:
-                logger.debug(
-                    f"Successfully created AIProvider for: {ai_config.provider.name if ai_config.provider else 'unknown'} "
-                    f"with package {package_name}"
-                )
+            provider = await provider_class.create(ai_config)
+            AIProviderFactory._logger.debug(
+                f"Successfully created AIProvider for: {ai_config.provider.name if ai_config.provider else 'unknown'} "
+                f"with package {package_name}"
+            )
             return provider
         except (ImportError, AttributeError, Exception) as error:
             # If the provider is not available or creation fails, return None
-            if logger:
-                logger.warn(
-                    f"Error creating AIProvider for: {ai_config.provider.name if ai_config.provider else 'unknown'} "
-                    f"with package {package_name}: {error}"
-                )
+            AIProviderFactory._logger.warning(
+                f"Error creating AIProvider for: {ai_config.provider.name if ai_config.provider else 'unknown'} "
+                f"with package {package_name}: {error}"
+            )
             return None
diff --git a/packages/langchain/ldai/providers/langchain/langchain_provider.py b/packages/langchain/ldai/providers/langchain/langchain_provider.py
index ece7fbf..92b7a6b 100644
--- a/packages/langchain/ldai/providers/langchain/langchain_provider.py
+++ b/packages/langchain/ldai/providers/langchain/langchain_provider.py
@@ -1,5 +1,7 @@
 """LangChain implementation of AIProvider for LaunchDarkly AI SDK."""
 
+import json
+import logging
 from typing import Any, Dict, List, Optional
 
 from langchain_core.language_models.chat_models import BaseChatModel
@@ -18,27 +20,25 @@ class LangChainProvider(AIProvider):
     This provider integrates LangChain models with LaunchDarkly's tracking capabilities.
     """
 
-    def __init__(self, llm: BaseChatModel, logger: Optional[Any] = None):
+    def __init__(self, llm: BaseChatModel):
         """
         Initialize the LangChain provider.
         
         :param llm: LangChain BaseChatModel instance
-        :param logger: Optional logger for logging provider operations
         """
-        super().__init__(logger)
+        super().__init__()
         self._llm = llm
 
     @staticmethod
-    async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'LangChainProvider':
+    async def create(ai_config: AIConfigKind) -> 'LangChainProvider':
         """
         Static factory method to create a LangChain AIProvider from an AI configuration.
         
         :param ai_config: The LaunchDarkly AI configuration
-        :param logger: Optional logger for the provider
         :return: Configured LangChainProvider instance
         """
         llm = await LangChainProvider.create_langchain_model(ai_config)
-        return LangChainProvider(llm, logger)
+        return LangChainProvider(llm)
 
     async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
         """
@@ -63,16 +63,14 @@ async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
                 content = response.content
             else:
                 # Log warning for non-string content (likely multimodal)
-                if self.logger:
-                    self.logger.warn(
-                        f"Multimodal response not supported, expecting a string. "
-                        f"Content type: {type(response.content)}, Content: {response.content}"
-                    )
+                self._logger.warning(
+                    f"Multimodal response not supported, expecting a string. "
+                    f"Content type: {type(response.content)}, Content: {response.content}"
+                )
                 # Update metrics to reflect content loss
                 metrics.success = False
 
             # Create the assistant message
-            from ldai.models import LDMessage
             assistant_message = LDMessage(role='assistant', content=content)
 
             return ChatResponse(
@@ -80,10 +78,8 @@ async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
                 metrics=metrics,
             )
         except Exception as error:
-            if self.logger:
-                self.logger.warn(f'LangChain model invocation failed: {error}')
+            self._logger.warning(f'LangChain model invocation failed: {error}')
 
-            from ldai.models import LDMessage
             return ChatResponse(
                 message=LDMessage(role='assistant', content=''),
                 metrics=LDAIMetrics(success=False, usage=None),
@@ -114,7 +110,6 @@ async def invoke_structured_model(
                 # Fallback: invoke normally and try to parse as JSON
                 response_obj = await self._llm.ainvoke(langchain_messages)
                 if isinstance(response_obj, AIMessage):
-                    import json
                     try:
                         response = json.loads(response_obj.content)
                     except json.JSONDecodeError:
@@ -128,15 +123,13 @@ async def invoke_structured_model(
                 usage=TokenUsage(total=0, input=0, output=0),
             )
 
-            import json
             return StructuredResponse(
                 data=response if isinstance(response, dict) else {'result': response},
                 raw_response=json.dumps(response) if not isinstance(response, str) else response,
                 metrics=metrics,
             )
         except Exception as error:
-            if self.logger:
-                self.logger.warn(f'LangChain structured model invocation failed: {error}')
+            self._logger.warning(f'LangChain structured model invocation failed: {error}')
 
             return StructuredResponse(
                 data={},
@@ -191,9 +184,9 @@ def get_ai_metrics_from_response(response: AIMessage) -> LDAIMetrics:
             token_usage = response.response_metadata.get('token_usage')
             if token_usage:
                 usage = TokenUsage(
-                    total=token_usage.get('total_tokens', 0) or token_usage.get('totalTokens', 0) or 0,
-                    input=token_usage.get('prompt_tokens', 0) or token_usage.get('promptTokens', 0) or 0,
-                    output=token_usage.get('completion_tokens', 0) or token_usage.get('completionTokens', 0) or 0,
+                    total=token_usage.get('total_tokens') or token_usage.get('totalTokens') or 0,
+                    input=token_usage.get('prompt_tokens') or token_usage.get('promptTokens') or 0,
+                    output=token_usage.get('completion_tokens') or token_usage.get('completionTokens') or 0,
                 )
 
         # LangChain responses that complete successfully are considered successful by default
diff --git a/packages/langchain/tests/test_langchain_provider.py b/packages/langchain/tests/test_langchain_provider.py
index 3bb83a1..db1913f 100644
--- a/packages/langchain/tests/test_langchain_provider.py
+++ b/packages/langchain/tests/test_langchain_provider.py
@@ -133,17 +133,12 @@ async def test_invoke_model_with_multimodal_content_warning(self):
         )
         mock_llm.ainvoke.return_value = mock_response
         
-        mock_logger = Mock()
-        provider = LangChainProvider(mock_llm, logger=mock_logger)
+        provider = LangChainProvider(mock_llm)
         messages = [LDMessage(role='user', content='Describe this image')]
         
         response = await provider.invoke_model(messages)
         
-        # Should warn about multimodal content
-        mock_logger.warn.assert_called_once()
-        assert 'Multimodal response not supported' in str(mock_logger.warn.call_args)
-        
-        # Should mark as failure
+        # Should mark as failure due to multimodal content not being supported
         assert response.metrics.success is False
         assert response.message.content == ''
 
@@ -153,16 +148,11 @@ async def test_invoke_model_with_exception(self):
         mock_llm = AsyncMock()
         mock_llm.ainvoke.side_effect = Exception('Model API error')
         
-        mock_logger = Mock()
-        provider = LangChainProvider(mock_llm, logger=mock_logger)
+        provider = LangChainProvider(mock_llm)
         messages = [LDMessage(role='user', content='Hello')]
         
         response = await provider.invoke_model(messages)
         
-        # Should log the error
-        mock_logger.warn.assert_called_once()
-        assert 'LangChain model invocation failed' in str(mock_logger.warn.call_args)
-        
         # Should return failure response
         assert response.message.role == 'assistant'
         assert response.message.content == ''
@@ -219,17 +209,12 @@ async def test_invoke_structured_model_with_exception(self):
         mock_llm = Mock()
         mock_llm.with_structured_output.side_effect = Exception('Structured output error')
         
-        mock_logger = Mock()
-        provider = LangChainProvider(mock_llm, logger=mock_logger)
+        provider = LangChainProvider(mock_llm)
         messages = [LDMessage(role='user', content='Question')]
         schema = {'answer': 'string'}
         
         response = await provider.invoke_structured_model(messages, schema)
         
-        # Should log the error
-        mock_logger.warn.assert_called_once()
-        assert 'LangChain structured model invocation failed' in str(mock_logger.warn.call_args)
-        
         # Should return failure response
         assert response.data == {}
         assert response.raw_response == ''

From 463145c3090b68f2266f3daae0aeeb45a502018f Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 22:43:25 +0100
Subject: [PATCH 32/37] linting

---
 packages/core/ldai/client.py                  |  4 +--
 packages/core/ldai/providers/ai_provider.py   |  2 +-
 .../ldai/providers/langchain/__init__.py      |  1 -
 .../providers/langchain/langchain_provider.py | 35 +++++++++----------
 4 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/packages/core/ldai/client.py b/packages/core/ldai/client.py
index a30f796..13c6523 100644
--- a/packages/core/ldai/client.py
+++ b/packages/core/ldai/client.py
@@ -160,7 +160,7 @@ async def create_judge(
         try:
             # Overwrite reserved variables to ensure they remain as placeholders for judge evaluation
             extended_variables = dict(variables) if variables else {}
-            
+
             # Warn if reserved variables are provided
             if variables:
                 if 'message_history' in variables:
@@ -171,7 +171,7 @@ async def create_judge(
                     self._logger.warning(
                         'Variable "response_to_evaluate" is reserved for judge evaluation and will be overwritten'
                     )
-            
+
             extended_variables['message_history'] = '{{message_history}}'
             extended_variables['response_to_evaluate'] = '{{response_to_evaluate}}'
 
diff --git a/packages/core/ldai/providers/ai_provider.py b/packages/core/ldai/providers/ai_provider.py
index 3e17221..637bb9d 100644
--- a/packages/core/ldai/providers/ai_provider.py
+++ b/packages/core/ldai/providers/ai_provider.py
@@ -22,7 +22,7 @@ class AIProvider(ABC):
     def __init__(self):
         """
         Initialize the AI provider.
-        
+
         Creates a logger for this provider instance.
         """
         self._logger = logging.getLogger(f'{__name__}.{self.__class__.__name__}')
diff --git a/packages/langchain/ldai/providers/langchain/__init__.py b/packages/langchain/ldai/providers/langchain/__init__.py
index 2e1a27a..822f049 100644
--- a/packages/langchain/ldai/providers/langchain/__init__.py
+++ b/packages/langchain/ldai/providers/langchain/__init__.py
@@ -3,4 +3,3 @@
 from ldai.providers.langchain.langchain_provider import LangChainProvider
 
 __all__ = ['LangChainProvider']
-
diff --git a/packages/langchain/ldai/providers/langchain/langchain_provider.py b/packages/langchain/ldai/providers/langchain/langchain_provider.py
index 92b7a6b..22542fd 100644
--- a/packages/langchain/ldai/providers/langchain/langchain_provider.py
+++ b/packages/langchain/ldai/providers/langchain/langchain_provider.py
@@ -16,14 +16,14 @@
 class LangChainProvider(AIProvider):
     """
     LangChain implementation of AIProvider.
-    
+
     This provider integrates LangChain models with LaunchDarkly's tracking capabilities.
     """
 
     def __init__(self, llm: BaseChatModel):
         """
         Initialize the LangChain provider.
-        
+
         :param llm: LangChain BaseChatModel instance
         """
         super().__init__()
@@ -33,7 +33,7 @@ def __init__(self, llm: BaseChatModel):
     async def create(ai_config: AIConfigKind) -> 'LangChainProvider':
         """
         Static factory method to create a LangChain AIProvider from an AI configuration.
-        
+
         :param ai_config: The LaunchDarkly AI configuration
         :return: Configured LangChainProvider instance
         """
@@ -43,7 +43,7 @@ async def create(ai_config: AIConfigKind) -> 'LangChainProvider':
     async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
         """
         Invoke the LangChain model with an array of messages.
-        
+
         :param messages: Array of LDMessage objects representing the conversation
         :return: ChatResponse containing the model's response
         """
@@ -92,7 +92,7 @@ async def invoke_structured_model(
     ) -> StructuredResponse:
         """
         Invoke the LangChain model with structured output support.
-        
+
         :param messages: Array of LDMessage objects representing the conversation
         :param response_structure: Dictionary of output configurations keyed by output name
         :return: StructuredResponse containing the structured data
@@ -143,7 +143,7 @@ async def invoke_structured_model(
     def get_chat_model(self) -> BaseChatModel:
         """
         Get the underlying LangChain model instance.
-        
+
         :return: The LangChain BaseChatModel instance
         """
         return self._llm
@@ -152,10 +152,10 @@ def get_chat_model(self) -> BaseChatModel:
     def map_provider(ld_provider_name: str) -> str:
         """
         Map LaunchDarkly provider names to LangChain provider names.
-        
+
         This method enables seamless integration between LaunchDarkly's standardized
         provider naming and LangChain's naming conventions.
-        
+
         :param ld_provider_name: LaunchDarkly provider name
         :return: LangChain provider name
         """
@@ -171,10 +171,10 @@ def map_provider(ld_provider_name: str) -> str:
     def get_ai_metrics_from_response(response: AIMessage) -> LDAIMetrics:
         """
         Get AI metrics from a LangChain provider response.
-        
+
         This method extracts token usage information and success status from LangChain responses
         and returns a LaunchDarkly LDAIMetrics object.
-        
+
         :param response: The response from the LangChain model
         :return: LDAIMetrics with success status and token usage
         """
@@ -196,10 +196,10 @@ def get_ai_metrics_from_response(response: AIMessage) -> LDAIMetrics:
     def convert_messages_to_langchain(messages: List[LDMessage]) -> List[BaseMessage]:
         """
         Convert LaunchDarkly messages to LangChain messages.
-        
+
         This helper method enables developers to work directly with LangChain message types
         while maintaining compatibility with LaunchDarkly's standardized message format.
-        
+
         :param messages: List of LDMessage objects
         :return: List of LangChain message objects
         """
@@ -219,10 +219,10 @@ def convert_messages_to_langchain(messages: List[LDMessage]) -> List[BaseMessage
     async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel:
         """
         Create a LangChain model from an AI configuration.
-        
+
         This public helper method enables developers to initialize their own LangChain models
         using LaunchDarkly AI configurations.
-        
+
         :param ai_config: The LaunchDarkly AI configuration
         :return: A configured LangChain BaseChatModel
         """
@@ -242,15 +242,15 @@ async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel:
             except ImportError:
                 # Fallback for older versions or different import path
                 from langchain.chat_models.universal import init_chat_model
-            
+
             # Map provider name
             langchain_provider = LangChainProvider.map_provider(provider)
-            
+
             # Create model configuration
             model_kwargs = {**parameters}
             if langchain_provider:
                 model_kwargs['model_provider'] = langchain_provider
-            
+
             # Initialize the chat model (init_chat_model may be async or sync)
             result = init_chat_model(model_name, **model_kwargs)
             # Handle both sync and async initialization
@@ -262,4 +262,3 @@ async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel:
                 'langchain package is required for LangChainProvider. '
                 'Install it with: pip install langchain langchain-core'
             ) from e
-

From e75ccd7f3658185c9ca733d571a0eefa80d55349 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 22:53:13 +0100
Subject: [PATCH 33/37] fixes

---
 packages/core/ldai/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/core/ldai/client.py b/packages/core/ldai/client.py
index 13c6523..6b1bb13 100644
--- a/packages/core/ldai/client.py
+++ b/packages/core/ldai/client.py
@@ -187,7 +187,7 @@ async def create_judge(
 
             return AIJudge(judge_config, judge_config.tracker, provider)
         except Exception as error:
-            # Would log error if logger available
+            self._logger.error(f'Failed to create judge: {error}')
             return None
 
     async def _initialize_judges(

From 47b3a2347b974c61e0b58b39a821d8fe55ae32d1 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 23:31:46 +0100
Subject: [PATCH 34/37] remove inline import.

---
 packages/core/ldai/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/core/ldai/client.py b/packages/core/ldai/client.py
index 6b1bb13..a775642 100644
--- a/packages/core/ldai/client.py
+++ b/packages/core/ldai/client.py
@@ -1,3 +1,4 @@
+import asyncio
 import logging
 from typing import Any, Dict, List, Optional, Tuple
 
@@ -223,7 +224,6 @@ async def create_judge_for_config(judge_key: str):
             for judge_config in judge_configs
         ]
 
-        import asyncio
         results = await asyncio.gather(*judge_promises, return_exceptions=True)
 
         for result in results:

From 30ec74d3c3a7472171a75001beeb8e6270ea7630 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 19 Nov 2025 23:46:35 +0100
Subject: [PATCH 35/37] remove uncessary logic

---
 packages/core/ldai/client.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/packages/core/ldai/client.py b/packages/core/ldai/client.py
index a775642..af259b7 100644
--- a/packages/core/ldai/client.py
+++ b/packages/core/ldai/client.py
@@ -48,7 +48,7 @@ def completion_config(
         )
 
         config = AICompletionConfig(
-            enabled=bool(enabled),
+            enabled=enabled,
             model=model,
             messages=messages,
             provider=provider,
@@ -105,7 +105,7 @@ def judge_config(
         evaluation_metric_keys = variation.get('evaluationMetricKeys', default_value.evaluation_metric_keys or [])
 
         config = AIJudgeConfig(
-            enabled=bool(enabled),
+            enabled=enabled,
             evaluation_metric_keys=evaluation_metric_keys,
             model=model,
             messages=messages,
@@ -553,7 +553,7 @@ def __evaluate_agent(
         final_instructions = instructions if instructions is not None else default_value.instructions
 
         return AIAgentConfig(
-            enabled=bool(enabled) if enabled is not None else (default_value.enabled or False),
+            enabled=enabled,
             model=model or default_value.model,
             provider=provider or default_value.provider,
             instructions=final_instructions,

From 163bcdfc0e8659fa201234daf3918323851fc168 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Thu, 20 Nov 2025 18:32:46 +0100
Subject: [PATCH 36/37] fixes

---
 packages/core/ldai/client.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/packages/core/ldai/client.py b/packages/core/ldai/client.py
index af259b7..2b314cf 100644
--- a/packages/core/ldai/client.py
+++ b/packages/core/ldai/client.py
@@ -402,10 +402,11 @@ def agent_configs(
         """
         # Track multiple agents usage
         agent_count = len(agent_configs)
+        config_keys = [config.key for config in agent_configs]
         self._client.track(
             "$ld:ai:agent:function:multiple",
             context,
-            agent_count,
+            {"configKeys": config_keys},
             agent_count
         )
 

From f0dcbc4014e4d92ab77636184b0299911d006ae7 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Thu, 20 Nov 2025 22:56:51 +0100
Subject: [PATCH 37/37] fixes

---
 ldai/testing/test_langchain_provider.py | 237 ------------------------
 1 file changed, 237 deletions(-)
 delete mode 100644 ldai/testing/test_langchain_provider.py

diff --git a/ldai/testing/test_langchain_provider.py b/ldai/testing/test_langchain_provider.py
deleted file mode 100644
index 3bb83a1..0000000
--- a/ldai/testing/test_langchain_provider.py
+++ /dev/null
@@ -1,237 +0,0 @@
-"""Tests for LangChain provider implementation."""
-
-import pytest
-from unittest.mock import AsyncMock, Mock
-
-from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
-
-from ldai.models import LDMessage
-from ldai.providers.langchain import LangChainProvider
-from ldai.tracker import TokenUsage
-
-
-class TestMessageConversion:
-    """Test conversion between LD messages and LangChain messages."""
-
-    def test_convert_multiple_messages(self):
-        """Test converting a conversation with all message types."""
-        ld_messages = [
-            LDMessage(role='system', content='You are helpful'),
-            LDMessage(role='user', content='Hello'),
-            LDMessage(role='assistant', content='Hi there!'),
-        ]
-        lc_messages = LangChainProvider.convert_messages_to_langchain(ld_messages)
-        
-        assert len(lc_messages) == 3
-        assert isinstance(lc_messages[0], SystemMessage)
-        assert isinstance(lc_messages[1], HumanMessage)
-        assert isinstance(lc_messages[2], AIMessage)
-        assert lc_messages[0].content == 'You are helpful'
-        assert lc_messages[1].content == 'Hello'
-        assert lc_messages[2].content == 'Hi there!'
-
-    def test_convert_unsupported_role_raises_error(self):
-        """Test that unsupported message roles raise ValueError."""
-        ld_messages = [LDMessage(role='function', content='Function result')]
-        
-        with pytest.raises(ValueError, match='Unsupported message role: function'):
-            LangChainProvider.convert_messages_to_langchain(ld_messages)
-
-
-class TestMetricsExtraction:
-    """Test metrics extraction from LangChain response metadata."""
-
-    def test_extract_metrics_with_token_usage(self):
-        """Test extracting token usage from response metadata."""
-        response = AIMessage(
-            content='Hello, world!',
-            response_metadata={
-                'token_usage': {
-                    'total_tokens': 100,
-                    'prompt_tokens': 60,
-                    'completion_tokens': 40,
-                }
-            }
-        )
-        
-        metrics = LangChainProvider.get_ai_metrics_from_response(response)
-        
-        assert metrics.success is True
-        assert metrics.usage is not None
-        assert metrics.usage.total == 100
-        assert metrics.usage.input == 60
-        assert metrics.usage.output == 40
-
-    def test_extract_metrics_with_camel_case_token_usage(self):
-        """Test extracting token usage with camelCase keys (some providers use this)."""
-        response = AIMessage(
-            content='Hello, world!',
-            response_metadata={
-                'token_usage': {
-                    'totalTokens': 150,
-                    'promptTokens': 90,
-                    'completionTokens': 60,
-                }
-            }
-        )
-        
-        metrics = LangChainProvider.get_ai_metrics_from_response(response)
-        
-        assert metrics.success is True
-        assert metrics.usage is not None
-        assert metrics.usage.total == 150
-        assert metrics.usage.input == 90
-        assert metrics.usage.output == 60
-
-    def test_extract_metrics_without_token_usage(self):
-        """Test metrics extraction when no token usage is available."""
-        response = AIMessage(content='Hello, world!')
-        
-        metrics = LangChainProvider.get_ai_metrics_from_response(response)
-        
-        assert metrics.success is True
-        assert metrics.usage is None
-
-
-class TestInvokeModel:
-    """Test model invocation with LangChain provider."""
-
-    @pytest.mark.asyncio
-    async def test_invoke_model_success(self):
-        """Test successful model invocation."""
-        mock_llm = AsyncMock()
-        mock_response = AIMessage(
-            content='Hello, user!',
-            response_metadata={
-                'token_usage': {
-                    'total_tokens': 20,
-                    'prompt_tokens': 10,
-                    'completion_tokens': 10,
-                }
-            }
-        )
-        mock_llm.ainvoke.return_value = mock_response
-        
-        provider = LangChainProvider(mock_llm)
-        messages = [LDMessage(role='user', content='Hello')]
-        
-        response = await provider.invoke_model(messages)
-        
-        assert response.message.role == 'assistant'
-        assert response.message.content == 'Hello, user!'
-        assert response.metrics.success is True
-        assert response.metrics.usage is not None
-        assert response.metrics.usage.total == 20
-
-    @pytest.mark.asyncio
-    async def test_invoke_model_with_multimodal_content_warning(self):
-        """Test that non-string content triggers warning and marks as failure."""
-        mock_llm = AsyncMock()
-        mock_response = AIMessage(
-            content=['text', {'type': 'image'}],  # Non-string content
-            response_metadata={'token_usage': {'total_tokens': 20}}
-        )
-        mock_llm.ainvoke.return_value = mock_response
-        
-        mock_logger = Mock()
-        provider = LangChainProvider(mock_llm, logger=mock_logger)
-        messages = [LDMessage(role='user', content='Describe this image')]
-        
-        response = await provider.invoke_model(messages)
-        
-        # Should warn about multimodal content
-        mock_logger.warn.assert_called_once()
-        assert 'Multimodal response not supported' in str(mock_logger.warn.call_args)
-        
-        # Should mark as failure
-        assert response.metrics.success is False
-        assert response.message.content == ''
-
-    @pytest.mark.asyncio
-    async def test_invoke_model_with_exception(self):
-        """Test model invocation handles exceptions gracefully."""
-        mock_llm = AsyncMock()
-        mock_llm.ainvoke.side_effect = Exception('Model API error')
-        
-        mock_logger = Mock()
-        provider = LangChainProvider(mock_llm, logger=mock_logger)
-        messages = [LDMessage(role='user', content='Hello')]
-        
-        response = await provider.invoke_model(messages)
-        
-        # Should log the error
-        mock_logger.warn.assert_called_once()
-        assert 'LangChain model invocation failed' in str(mock_logger.warn.call_args)
-        
-        # Should return failure response
-        assert response.message.role == 'assistant'
-        assert response.message.content == ''
-        assert response.metrics.success is False
-        assert response.metrics.usage is None
-
-
-class TestInvokeStructuredModel:
-    """Test structured output invocation."""
-
-    @pytest.mark.asyncio
-    async def test_invoke_structured_model_with_support(self):
-        """Test structured output when model supports with_structured_output."""
-        mock_llm = Mock()
-        mock_structured_llm = AsyncMock()
-        mock_structured_llm.ainvoke.return_value = {
-            'answer': 'Paris',
-            'confidence': 0.95
-        }
-        mock_llm.with_structured_output.return_value = mock_structured_llm
-        
-        provider = LangChainProvider(mock_llm)
-        messages = [LDMessage(role='user', content='What is the capital of France?')]
-        schema = {'answer': 'string', 'confidence': 'number'}
-        
-        response = await provider.invoke_structured_model(messages, schema)
-        
-        assert response.data == {'answer': 'Paris', 'confidence': 0.95}
-        assert response.metrics.success is True
-        mock_llm.with_structured_output.assert_called_once_with(schema)
-
-    @pytest.mark.asyncio
-    async def test_invoke_structured_model_without_support_json_fallback(self):
-        """Test structured output fallback to JSON parsing when not supported."""
-        mock_llm = AsyncMock()
-        # Model doesn't have with_structured_output
-        delattr(mock_llm, 'with_structured_output') if hasattr(mock_llm, 'with_structured_output') else None
-        
-        mock_response = AIMessage(content='{"answer": "Berlin", "confidence": 0.9}')
-        mock_llm.ainvoke.return_value = mock_response
-        
-        provider = LangChainProvider(mock_llm)
-        messages = [LDMessage(role='user', content='What is the capital of Germany?')]
-        schema = {'answer': 'string', 'confidence': 'number'}
-        
-        response = await provider.invoke_structured_model(messages, schema)
-        
-        assert response.data == {'answer': 'Berlin', 'confidence': 0.9}
-        assert response.metrics.success is True
-
-    @pytest.mark.asyncio
-    async def test_invoke_structured_model_with_exception(self):
-        """Test structured output handles exceptions gracefully."""
-        mock_llm = Mock()
-        mock_llm.with_structured_output.side_effect = Exception('Structured output error')
-        
-        mock_logger = Mock()
-        provider = LangChainProvider(mock_llm, logger=mock_logger)
-        messages = [LDMessage(role='user', content='Question')]
-        schema = {'answer': 'string'}
-        
-        response = await provider.invoke_structured_model(messages, schema)
-        
-        # Should log the error
-        mock_logger.warn.assert_called_once()
-        assert 'LangChain structured model invocation failed' in str(mock_logger.warn.call_args)
-        
-        # Should return failure response
-        assert response.data == {}
-        assert response.raw_response == ''
-        assert response.metrics.success is False
-