From bb7b88c4fbb6877b9160f201c56589d2e15d8aa6 Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Tue, 20 Jan 2026 20:47:43 +0200 Subject: [PATCH 01/18] add new models enums --- statgpt/common/config/llm_models.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/statgpt/common/config/llm_models.py b/statgpt/common/config/llm_models.py index 56769f82..4972cd44 100644 --- a/statgpt/common/config/llm_models.py +++ b/statgpt/common/config/llm_models.py @@ -30,6 +30,10 @@ class LLMModelsEnum(StrEnum): GPT_4_1_MINI_2025_04_14 = "gpt-4.1-mini-2025-04-14" GPT_4_1_NANO_2025_04_14 = "gpt-4.1-nano-2025-04-14" + # GPT-5 models + GPT_5_1_2025_11_13 = "gpt-5.1-2025-11-13" + GPT_5_2_2025_12_11 = "gpt-5.2-2025-12-11" + @property def deployment_id(self) -> str: return os.getenv(f"LLM_MODELS_{self.name}", self.value) @@ -42,3 +46,11 @@ def is_gpt_41_family(self) -> bool: LLMModelsEnum.GPT_4_1_MINI_2025_04_14, LLMModelsEnum.GPT_4_1_NANO_2025_04_14, } + + @property + def is_gpt_5_family(self) -> bool: + """Check if the model belongs to the GPT-5 family.""" + return self in { + LLMModelsEnum.GPT_5_1_2025_11_13, + LLMModelsEnum.GPT_5_2_2025_12_11, + } From 45362233fc5b70803a15b2ccddebfa604da75e16 Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Mon, 2 Feb 2026 19:27:08 +0200 Subject: [PATCH 02/18] add gpt 5 family support --- statgpt/common/config/__init__.py | 2 +- statgpt/common/config/llm_models.py | 22 ++++++++++++++++++++++ statgpt/common/schemas/model_config.py | 26 ++++++++++++++++++++++---- statgpt/common/settings/langchain.py | 25 +++++++++++++++++++------ statgpt/common/utils/models.py | 18 +++++++++++++++--- 5 files changed, 79 insertions(+), 14 deletions(-) diff --git a/statgpt/common/config/__init__.py b/statgpt/common/config/__init__.py index d39b9f99..06a5c5a2 100644 --- a/statgpt/common/config/__init__.py +++ b/statgpt/common/config/__init__.py @@ -1,4 +1,4 @@ -from .llm_models import EmbeddingModelsEnum, LLMModelsEnum +from .llm_models import EmbeddingModelsEnum, LLMModelsEnum, ReasoningEffortEnum, VerbosityEnum from .logging import LoggingConfig, logger, multiline_logger from .versions import Versions diff --git a/statgpt/common/config/llm_models.py b/statgpt/common/config/llm_models.py index 4972cd44..5c2523bf 100644 --- a/statgpt/common/config/llm_models.py +++ b/statgpt/common/config/llm_models.py @@ -6,6 +6,26 @@ class EmbeddingModelsEnum(StrEnum): TEXT_EMBEDDING_3_LARGE = "text-embedding-3-large" +class ReasoningEffortEnum(StrEnum): + """Reasoning effort levels for GPT-5 models.""" + + NONE = "none" # No reasoning mode - standard inference + MINIMAL = "minimal" + LOW = "low" + MEDIUM = "medium" + HIGH = "high" + XHIGH = "xhigh" + + +# NOTE: currently not used and not supported by LangChain, kept for future compatibility +class VerbosityEnum(StrEnum): + """Output verbosity levels for GPT-5 models.""" + + LOW = "low" + MEDIUM = "medium" + HIGH = "high" + + class LLMModelsEnum(StrEnum): # Gemini models GEMINI_2_0_FLASH_LITE_001 = "gemini-2.0-flash-lite-001" @@ -31,6 +51,7 @@ class LLMModelsEnum(StrEnum): GPT_4_1_NANO_2025_04_14 = "gpt-4.1-nano-2025-04-14" # GPT-5 models + GPT_5_MINI_2025_08_07 = "gpt-5-mini-2025-08-07" GPT_5_1_2025_11_13 = "gpt-5.1-2025-11-13" GPT_5_2_2025_12_11 = "gpt-5.2-2025-12-11" @@ -51,6 +72,7 @@ def is_gpt_41_family(self) -> bool: def is_gpt_5_family(self) -> bool: """Check if the model belongs to the GPT-5 family.""" return self in { + LLMModelsEnum.GPT_5_MINI_2025_08_07, LLMModelsEnum.GPT_5_1_2025_11_13, LLMModelsEnum.GPT_5_2_2025_12_11, } diff --git a/statgpt/common/schemas/model_config.py b/statgpt/common/schemas/model_config.py index 20c14078..ad8125db 100644 --- a/statgpt/common/schemas/model_config.py +++ b/statgpt/common/schemas/model_config.py @@ -1,6 +1,11 @@ from pydantic import Field -from statgpt.common.config import EmbeddingModelsEnum, LLMModelsEnum +from statgpt.common.config import ( + EmbeddingModelsEnum, + LLMModelsEnum, + ReasoningEffortEnum, + VerbosityEnum, +) from statgpt.common.settings.langchain import langchain_settings from .base import BaseYamlModel @@ -10,7 +15,8 @@ class BaseModelConfig(BaseYamlModel): """Base config for LLM and embeddings models configs.""" api_version: str = Field( - default=langchain_settings.default_api_version, description="API version for the model" + default=langchain_settings.default_api_version, + description="API version for the model", ) @@ -34,12 +40,24 @@ class LLMModelConfig(BaseModelConfig): default=langchain_settings.default_temperature, description=( "The temperature of the model. 0.0 means deterministic output, higher values mean more" - " randomness." + " randomness. Note: Not supported by GPT-5 models." ), ) seed: int | None = Field( default=langchain_settings.default_seed, description=( - "The seed of the model. If set, the model will produce the same output for the same input." + "The seed of the model. If set, the model will produce the same output for the same input. " + "Note: Not supported by GPT-5 models." ), ) + reasoning_effort: ReasoningEffortEnum | None = Field( + default=langchain_settings.default_reasoning_effort, + description=( + "Reasoning effort level for GPT-5 models. Ignored for non-reasoning models. " + "Supports: none, minimal, low, medium, high, xhigh." + ), + ) + verbosity: VerbosityEnum | None = Field( + default=langchain_settings.default_verbosity, + description=("Output verbosity for GPT-5 models (low/medium/high). "), + ) diff --git a/statgpt/common/settings/langchain.py b/statgpt/common/settings/langchain.py index 52f2f8fe..491f1e6d 100644 --- a/statgpt/common/settings/langchain.py +++ b/statgpt/common/settings/langchain.py @@ -1,10 +1,13 @@ -from typing import Optional - -from langchain import globals as lc_globals +from langchain_core import globals as lc_globals from pydantic import Field from pydantic_settings import BaseSettings, SettingsConfigDict -from statgpt.common.config.llm_models import EmbeddingModelsEnum, LLMModelsEnum +from statgpt.common.config.llm_models import ( + EmbeddingModelsEnum, + LLMModelsEnum, + ReasoningEffortEnum, + VerbosityEnum, +) class LangChainSettings(BaseSettings): @@ -21,7 +24,7 @@ class LangChainSettings(BaseSettings): ) default_model: LLMModelsEnum = Field( - default=LLMModelsEnum.GPT_4_1_2025_04_14, + default=LLMModelsEnum.GPT_5_2_2025_12_11, description="Default LLM model for LangChain", ) @@ -35,11 +38,21 @@ class LangChainSettings(BaseSettings): description="Default API version for Azure OpenAI", ) - default_seed: Optional[int] = Field( + default_seed: int | None = Field( default=None, description="Default seed for reproducible outputs", ) + default_reasoning_effort: ReasoningEffortEnum | None = Field( + default=ReasoningEffortEnum.NONE, + description="Default reasoning effort for GPT-5 models (none/minimal/low/medium/high/xhigh)", + ) + + default_verbosity: VerbosityEnum | None = Field( + default=VerbosityEnum.LOW, + description="Default verbosity for GPT-5 models (low/medium/high). None means use model default.", + ) + # Debugging settings verbose: bool = Field(default=False, description="Enable verbose mode for LangChain") diff --git a/statgpt/common/utils/models.py b/statgpt/common/utils/models.py index fc113b4c..e4bbd62c 100644 --- a/statgpt/common/utils/models.py +++ b/statgpt/common/utils/models.py @@ -4,6 +4,7 @@ from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings from pydantic import SecretStr +from statgpt.common.config import ReasoningEffortEnum from statgpt.common.config.logging import multiline_logger as logger from statgpt.common.schemas import EmbeddingsModelConfig, LLMModelConfig from statgpt.common.settings.dial import dial_settings @@ -26,13 +27,24 @@ def get_chat_model( azure_endpoint=azure_endpoint, api_version=model_config.api_version, azure_deployment=model_config.deployment.deployment_id, - temperature=model_config.temperature, - seed=model_config.seed, max_retries=10, api_key=api_key, # since we use SecretStr, it won't be logged timeout=timeout, # timeouts are crucial! ) - params.update(kwargs) # update default params + + if model_config.deployment.is_gpt_5_family: + # GPT-5: use reasoning_effort, temperature only allowed with reasoning_effort=none + if model_config.reasoning_effort is not None: + params["reasoning_effort"] = model_config.reasoning_effort + if model_config.reasoning_effort == ReasoningEffortEnum.NONE: + params["temperature"] = 0 + params.update({k: v for k, v in kwargs.items() if k not in ("temperature", "seed")}) + else: + # Legacy models: use temperature and seed + params["temperature"] = model_config.temperature + if model_config.seed is not None: + params["seed"] = model_config.seed + params.update(kwargs) if model_config.deployment.is_gpt_41_family: callback = BrokenResponseInterceptor(regex_pattern=r'\s{5,}') From 615702a97c2c30475466e54436780224272c7784 Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Mon, 2 Feb 2026 19:27:18 +0200 Subject: [PATCH 03/18] upd default configs --- configurations/clients/sample/channels.yaml | 4 +- configurations/clients/sample/tools.yaml | 43 +++++++++++++-------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/configurations/clients/sample/channels.yaml b/configurations/clients/sample/channels.yaml index de37030d..be2390b8 100644 --- a/configurations/clients/sample/channels.yaml +++ b/configurations/clients/sample/channels.yaml @@ -18,7 +18,9 @@ channels: datasets, as it contains data directly provided by national statistical agencies. llm_model_config: - deployment: "gpt-4.1-2025-04-14" + deployment: "gpt-5.2-2025-12-11" + reasoningEffort: "none" + verbosity: "low" out_of_scope: use_general_topics_blacklist: true domain: "Statistics, economics and SDMX." diff --git a/configurations/clients/sample/tools.yaml b/configurations/clients/sample/tools.yaml index d61b14ef..cee5ffa4 100644 --- a/configurations/clients/sample/tools.yaml +++ b/configurations/clients/sample/tools.yaml @@ -1,29 +1,38 @@ _available_datasets_call_id: &available_datasets_call_id "call_EBJJeaOMKeCzm8h378ubURQN" _available_terms_tool_call_id: &availableTermsCallId "call_EBJJeaOMKeCzm8h378ubU003" -_data_query_gpt_41_models: &data-query-gpt-41-models +_data_query_gpt_5_models: &data-query-gpt-5-models datasetsSelectionModelConfig: - deployment: "gpt-4.1-2025-04-14" - temperature: 0.0 + deployment: "gpt-5.2-2025-12-11" + reasoningEffort: "none" + verbosity: "low" dimensionsSelectionModelConfig: - deployment: "gpt-4.1-2025-04-14" - temperature: 0.0 + deployment: "gpt-5.2-2025-12-11" + reasoningEffort: "none" + verbosity: "low" indicatorsSelectionModelConfig: - deployment: "gpt-4.1-2025-04-14" - temperature: 0.0 + deployment: "gpt-5.2-2025-12-11" + reasoningEffort: "none" + verbosity: "low" incompleteQueriesModelConfig: - deployment: "gpt-4.1-2025-04-14" + deployment: "gpt-5.2-2025-12-11" + reasoningEffort: "none" + verbosity: "low" groupExpanderModelConfig: - deployment: "gpt-4.1-2025-04-14" - temperature: 0.0 + deployment: "gpt-5.2-2025-12-11" + reasoningEffort: "none" + verbosity: "low" namedEntitiesModelConfig: - deployment: "gpt-4.1-2025-04-14" - temperature: 0.0 + deployment: "gpt-5.2-2025-12-11" + reasoningEffort: "none" + verbosity: "low" queryNormalizationModelConfig: - deployment: "gpt-4.1-2025-04-14" - temperature: 0.0 + deployment: "gpt-5.2-2025-12-11" + reasoningEffort: "none" + verbosity: "low" timePeriodModelConfig: - deployment: "gpt-4.1-2025-04-14" - temperature: 0.0 + deployment: "gpt-5.2-2025-12-11" + reasoningEffort: "none" + verbosity: "low" _relevancy_prompts: &relevancy_prompts systemMessage: |- You are an expert in statistical indicators. @@ -119,7 +128,7 @@ tools: namedEntitiesToRemove: ["Country/Reference area", "Counterpart area/country"] prompts: relevancyPrompts: *relevancy_prompts - llmModels: *data-query-gpt-41-models + llmModels: *data-query-gpt-5-models attachments: customTable: enabledStr: "True" From 9e98dcdec635f9f47c4ae4dc89768d4fb6f106dd Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Tue, 3 Feb 2026 15:20:08 +0200 Subject: [PATCH 04/18] change default model back to 4.1 --- statgpt/common/settings/langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/statgpt/common/settings/langchain.py b/statgpt/common/settings/langchain.py index 491f1e6d..bd355979 100644 --- a/statgpt/common/settings/langchain.py +++ b/statgpt/common/settings/langchain.py @@ -24,7 +24,7 @@ class LangChainSettings(BaseSettings): ) default_model: LLMModelsEnum = Field( - default=LLMModelsEnum.GPT_5_2_2025_12_11, + default=LLMModelsEnum.GPT_4_1_2025_04_14, description="Default LLM model for LangChain", ) From b48cf3b454bf3bb4e7030ab864647312568b01d8 Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Tue, 3 Feb 2026 15:43:53 +0200 Subject: [PATCH 05/18] set default to 5.1 --- statgpt/common/settings/langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/statgpt/common/settings/langchain.py b/statgpt/common/settings/langchain.py index bd355979..55a63d73 100644 --- a/statgpt/common/settings/langchain.py +++ b/statgpt/common/settings/langchain.py @@ -24,7 +24,7 @@ class LangChainSettings(BaseSettings): ) default_model: LLMModelsEnum = Field( - default=LLMModelsEnum.GPT_4_1_2025_04_14, + default=LLMModelsEnum.GPT_5_1_2025_11_13, description="Default LLM model for LangChain", ) From 3fe8ff678bba469b7820daa5946a8c5e47a92ee7 Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Tue, 3 Feb 2026 15:47:24 +0200 Subject: [PATCH 06/18] change default to 5.2 --- statgpt/common/settings/langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/statgpt/common/settings/langchain.py b/statgpt/common/settings/langchain.py index 55a63d73..491f1e6d 100644 --- a/statgpt/common/settings/langchain.py +++ b/statgpt/common/settings/langchain.py @@ -24,7 +24,7 @@ class LangChainSettings(BaseSettings): ) default_model: LLMModelsEnum = Field( - default=LLMModelsEnum.GPT_5_1_2025_11_13, + default=LLMModelsEnum.GPT_5_2_2025_12_11, description="Default LLM model for LangChain", ) From b5ee249d85eef56a4fd163e961f56ee51fbfe905 Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Tue, 3 Feb 2026 16:13:08 +0200 Subject: [PATCH 07/18] upd defaults llms for hybrid search --- statgpt/common/schemas/data_query_tool.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/statgpt/common/schemas/data_query_tool.py b/statgpt/common/schemas/data_query_tool.py index 0dcaf981..6cd35155 100644 --- a/statgpt/common/schemas/data_query_tool.py +++ b/statgpt/common/schemas/data_query_tool.py @@ -170,11 +170,11 @@ class HybridSearchConfig(BaseYamlModel): normalize_model_config: LLMModelConfig = Field( description="LLM Model used for normalization", - default_factory=lambda: LLMModelConfig(deployment=LLMModelsEnum.GPT_4_1_MINI_2025_04_14), + default_factory=lambda: LLMModelConfig(deployment=LLMModelsEnum.GPT_5_MINI_2025_08_07), ) harmonize_model_config: LLMModelConfig = Field( description="LLM Model used for harmonization", - default_factory=lambda: LLMModelConfig(deployment=LLMModelsEnum.GPT_4_1_MINI_2025_04_14), + default_factory=lambda: LLMModelConfig(deployment=LLMModelsEnum.GPT_5_MINI_2025_08_07), ) # ~~~~~~~~~~ Search config ~~~~~~~~~~ From e5f044c7b8ebb681f951f15f2401703753898a2c Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Tue, 3 Feb 2026 19:37:17 +0200 Subject: [PATCH 08/18] upd default llms for hybrid search --- statgpt/common/schemas/data_query_tool.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/statgpt/common/schemas/data_query_tool.py b/statgpt/common/schemas/data_query_tool.py index 6cd35155..1bf81fb3 100644 --- a/statgpt/common/schemas/data_query_tool.py +++ b/statgpt/common/schemas/data_query_tool.py @@ -1,7 +1,7 @@ from pydantic import Field, PositiveInt, TypeAdapter, field_validator from pydantic_core.core_schema import FieldValidationInfo -from statgpt.common.config import LLMModelsEnum +from statgpt.common.config import LLMModelsEnum, ReasoningEffortEnum, VerbosityEnum from statgpt.common.config.utils import replace_env from .base import BaseYamlModel, SystemUserPrompt @@ -170,11 +170,19 @@ class HybridSearchConfig(BaseYamlModel): normalize_model_config: LLMModelConfig = Field( description="LLM Model used for normalization", - default_factory=lambda: LLMModelConfig(deployment=LLMModelsEnum.GPT_5_MINI_2025_08_07), + default_factory=lambda: LLMModelConfig( + deployment=LLMModelsEnum.GPT_5_MINI_2025_08_07, + reasoning_effort=ReasoningEffortEnum.NONE, + verbosity=VerbosityEnum.LOW, + ), ) harmonize_model_config: LLMModelConfig = Field( description="LLM Model used for harmonization", - default_factory=lambda: LLMModelConfig(deployment=LLMModelsEnum.GPT_5_MINI_2025_08_07), + default_factory=lambda: LLMModelConfig( + deployment=LLMModelsEnum.GPT_5_MINI_2025_08_07, + reasoning_effort=ReasoningEffortEnum.NONE, + verbosity=VerbosityEnum.LOW, + ), ) # ~~~~~~~~~~ Search config ~~~~~~~~~~ From a9960adafd61c77a475c818dd40cab822a8182d1 Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Wed, 4 Feb 2026 10:27:05 +0200 Subject: [PATCH 09/18] switch hybrid indexing to minimal effort --- statgpt/common/schemas/data_query_tool.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/statgpt/common/schemas/data_query_tool.py b/statgpt/common/schemas/data_query_tool.py index 1bf81fb3..57306b0f 100644 --- a/statgpt/common/schemas/data_query_tool.py +++ b/statgpt/common/schemas/data_query_tool.py @@ -172,7 +172,7 @@ class HybridSearchConfig(BaseYamlModel): description="LLM Model used for normalization", default_factory=lambda: LLMModelConfig( deployment=LLMModelsEnum.GPT_5_MINI_2025_08_07, - reasoning_effort=ReasoningEffortEnum.NONE, + reasoning_effort=ReasoningEffortEnum.MINIMAL, verbosity=VerbosityEnum.LOW, ), ) @@ -180,7 +180,7 @@ class HybridSearchConfig(BaseYamlModel): description="LLM Model used for harmonization", default_factory=lambda: LLMModelConfig( deployment=LLMModelsEnum.GPT_5_MINI_2025_08_07, - reasoning_effort=ReasoningEffortEnum.NONE, + reasoning_effort=ReasoningEffortEnum.MINIMAL, verbosity=VerbosityEnum.LOW, ), ) From d418829b3c2435e81d7a2f64f1af401076eef014 Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Wed, 4 Feb 2026 10:27:32 +0200 Subject: [PATCH 10/18] set temp to 1 for other reasoning levels --- statgpt/common/utils/models.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/statgpt/common/utils/models.py b/statgpt/common/utils/models.py index e4bbd62c..4ac2339b 100644 --- a/statgpt/common/utils/models.py +++ b/statgpt/common/utils/models.py @@ -33,11 +33,16 @@ def get_chat_model( ) if model_config.deployment.is_gpt_5_family: - # GPT-5: use reasoning_effort, temperature only allowed with reasoning_effort=none + # GPT-5: use reasoning_effort parameter if model_config.reasoning_effort is not None: params["reasoning_effort"] = model_config.reasoning_effort if model_config.reasoning_effort == ReasoningEffortEnum.NONE: + # reasoning_effort=none: use temperature=0 for deterministic output params["temperature"] = 0 + else: + # NOTE: Temporarily set temperature=1 for reasoning modes (minimal/low/medium/high/xhigh) + # TODO: Remove this once Azure OpenAI API is upgraded to properly handle reasoning modes without temperature + params["temperature"] = 1 params.update({k: v for k, v in kwargs.items() if k not in ("temperature", "seed")}) else: # Legacy models: use temperature and seed From ac24314b3d73fc8ab8a2dd98e3f51d547574b222 Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Thu, 19 Feb 2026 10:50:21 +0200 Subject: [PATCH 11/18] remove unused import --- statgpt/common/utils/models.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/statgpt/common/utils/models.py b/statgpt/common/utils/models.py index 8a9c3a6e..5d487c9b 100644 --- a/statgpt/common/utils/models.py +++ b/statgpt/common/utils/models.py @@ -4,7 +4,6 @@ from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings from pydantic import SecretStr -from statgpt.common.config import ReasoningEffortEnum from statgpt.common.config.logging import multiline_logger as logger from statgpt.common.schemas import EmbeddingsModelConfig, LLMModelConfig from statgpt.common.settings.dial import dial_settings @@ -30,15 +29,19 @@ def get_chat_model( timeout=timeout, # timeouts are crucial! ) - params.update(model_config.model_dump(mode="json", exclude_none=True, exclude={"deployment"})) + params.update( + model_config.model_dump(mode="json", exclude_none=True, exclude={"deployment"}) + ) if model_config.deployment.is_gpt_41_family: - callback = BrokenResponseInterceptor(regex_pattern=r'\s{5,}') - params.setdefault('callbacks', []).append(callback) + callback = BrokenResponseInterceptor(regex_pattern=r"\s{5,}") + params.setdefault("callbacks", []).append(callback) - api_key_log = f'{api_key.get_secret_value()[:3]}*****{api_key.get_secret_value()[-2:]}' + api_key_log = ( + f"{api_key.get_secret_value()[:3]}*****{api_key.get_secret_value()[-2:]}" + ) logger.info( - f'creating langchain LLM with the following params: {params}, Api key: {api_key_log}' + f"creating langchain LLM with the following params: {params}, Api key: {api_key_log}" ) return AzureChatOpenAI.model_validate(params) @@ -57,8 +60,10 @@ def get_embeddings_model( max_retries=10, api_key=api_key, # since we use SecretStr, it won't be logged ) - api_key_log = f'{api_key.get_secret_value()[:3]}*****{api_key.get_secret_value()[-2:]}' + api_key_log = ( + f"{api_key.get_secret_value()[:3]}*****{api_key.get_secret_value()[-2:]}" + ) logger.info( - f'creating langchain embeddings with the following params: {params}, Api key: {api_key_log}' + f"creating langchain embeddings with the following params: {params}, Api key: {api_key_log}" ) return AzureOpenAIEmbeddings.model_validate(params) From eccd61c859419f5a58e8472f4316da24eb5c702f Mon Sep 17 00:00:00 2001 From: bodiakap Date: Thu, 19 Feb 2026 11:16:48 +0200 Subject: [PATCH 12/18] Potential fix for code scanning alert no. 48: Clear-text logging of sensitive information Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- statgpt/common/utils/models.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/statgpt/common/utils/models.py b/statgpt/common/utils/models.py index 5d487c9b..173e1148 100644 --- a/statgpt/common/utils/models.py +++ b/statgpt/common/utils/models.py @@ -37,11 +37,8 @@ def get_chat_model( callback = BrokenResponseInterceptor(regex_pattern=r"\s{5,}") params.setdefault("callbacks", []).append(callback) - api_key_log = ( - f"{api_key.get_secret_value()[:3]}*****{api_key.get_secret_value()[-2:]}" - ) logger.info( - f"creating langchain LLM with the following params: {params}, Api key: {api_key_log}" + f"creating langchain LLM with the following params: {params}" ) return AzureChatOpenAI.model_validate(params) @@ -60,10 +57,7 @@ def get_embeddings_model( max_retries=10, api_key=api_key, # since we use SecretStr, it won't be logged ) - api_key_log = ( - f"{api_key.get_secret_value()[:3]}*****{api_key.get_secret_value()[-2:]}" - ) logger.info( - f"creating langchain embeddings with the following params: {params}, Api key: {api_key_log}" + f"creating langchain embeddings with the following params: {params}" ) return AzureOpenAIEmbeddings.model_validate(params) From 7a565e477d0b6a7effaf2344e5aba4e5655d9214 Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Thu, 19 Feb 2026 11:21:30 +0200 Subject: [PATCH 13/18] reformat --- statgpt/common/utils/models.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/statgpt/common/utils/models.py b/statgpt/common/utils/models.py index 5d487c9b..ae3dc7db 100644 --- a/statgpt/common/utils/models.py +++ b/statgpt/common/utils/models.py @@ -29,17 +29,13 @@ def get_chat_model( timeout=timeout, # timeouts are crucial! ) - params.update( - model_config.model_dump(mode="json", exclude_none=True, exclude={"deployment"}) - ) + params.update(model_config.model_dump(mode="json", exclude_none=True, exclude={"deployment"})) if model_config.deployment.is_gpt_41_family: callback = BrokenResponseInterceptor(regex_pattern=r"\s{5,}") params.setdefault("callbacks", []).append(callback) - api_key_log = ( - f"{api_key.get_secret_value()[:3]}*****{api_key.get_secret_value()[-2:]}" - ) + api_key_log = f"{api_key.get_secret_value()[:3]}*****{api_key.get_secret_value()[-2:]}" logger.info( f"creating langchain LLM with the following params: {params}, Api key: {api_key_log}" ) @@ -60,9 +56,7 @@ def get_embeddings_model( max_retries=10, api_key=api_key, # since we use SecretStr, it won't be logged ) - api_key_log = ( - f"{api_key.get_secret_value()[:3]}*****{api_key.get_secret_value()[-2:]}" - ) + api_key_log = f"{api_key.get_secret_value()[:3]}*****{api_key.get_secret_value()[-2:]}" logger.info( f"creating langchain embeddings with the following params: {params}, Api key: {api_key_log}" ) From 6d39a703d67888b7fded393d278c56274578a199 Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Thu, 19 Feb 2026 11:30:13 +0200 Subject: [PATCH 14/18] reformat --- statgpt/common/utils/models.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/statgpt/common/utils/models.py b/statgpt/common/utils/models.py index 16262036..60eaead1 100644 --- a/statgpt/common/utils/models.py +++ b/statgpt/common/utils/models.py @@ -35,9 +35,7 @@ def get_chat_model( callback = BrokenResponseInterceptor(regex_pattern=r"\s{5,}") params.setdefault("callbacks", []).append(callback) - logger.info( - f"creating langchain LLM with the following params: {params}" - ) + logger.info(f"creating langchain LLM with the following params: {params}") return AzureChatOpenAI.model_validate(params) @@ -55,7 +53,5 @@ def get_embeddings_model( max_retries=10, api_key=api_key, # since we use SecretStr, it won't be logged ) - logger.info( - f"creating langchain embeddings with the following params: {params}" - ) + logger.info(f"creating langchain embeddings with the following params: {params}") return AzureOpenAIEmbeddings.model_validate(params) From 1b431d9c72627f090b2ac2f361dc857bc767c2d2 Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Thu, 19 Feb 2026 11:58:01 +0200 Subject: [PATCH 15/18] add temperature to configs --- configurations/clients/sample/channels.yaml | 6 +++++- configurations/clients/sample/tools.yaml | 8 ++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/configurations/clients/sample/channels.yaml b/configurations/clients/sample/channels.yaml index b844c86d..efad7647 100644 --- a/configurations/clients/sample/channels.yaml +++ b/configurations/clients/sample/channels.yaml @@ -21,9 +21,13 @@ channels: deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" + temperature: 1 out_of_scope: llm_model_config: - deployment: "gpt-4.1-2025-04-14" + deployment: "gpt-5.2-2025-12-11" + reasoningEffort: "none" + verbosity: "low" + temperature: 1 use_general_topics_blacklist: true domain: "Statistics, economics and SDMX." token_usage: diff --git a/configurations/clients/sample/tools.yaml b/configurations/clients/sample/tools.yaml index cee5ffa4..fe7b226c 100644 --- a/configurations/clients/sample/tools.yaml +++ b/configurations/clients/sample/tools.yaml @@ -5,34 +5,42 @@ _data_query_gpt_5_models: &data-query-gpt-5-models deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" + temperature: 1 dimensionsSelectionModelConfig: deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" + temperature: 1 indicatorsSelectionModelConfig: deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" + temperature: 1 incompleteQueriesModelConfig: deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" + temperature: 1 groupExpanderModelConfig: deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" + temperature: 1 namedEntitiesModelConfig: deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" + temperature: 1 queryNormalizationModelConfig: deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" + temperature: 1 timePeriodModelConfig: deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" + temperature: 1 _relevancy_prompts: &relevancy_prompts systemMessage: |- You are an expert in statistical indicators. From 0c48f10191c7428690679fa7e287175f2705be33 Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Thu, 19 Feb 2026 13:16:34 +0200 Subject: [PATCH 16/18] add seed to configs --- configurations/clients/sample/channels.yaml | 2 ++ configurations/clients/sample/tools.yaml | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/configurations/clients/sample/channels.yaml b/configurations/clients/sample/channels.yaml index efad7647..bde04ce9 100644 --- a/configurations/clients/sample/channels.yaml +++ b/configurations/clients/sample/channels.yaml @@ -22,12 +22,14 @@ channels: reasoningEffort: "none" verbosity: "low" temperature: 1 + seed: null out_of_scope: llm_model_config: deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" temperature: 1 + seed: null use_general_topics_blacklist: true domain: "Statistics, economics and SDMX." token_usage: diff --git a/configurations/clients/sample/tools.yaml b/configurations/clients/sample/tools.yaml index fe7b226c..43125c64 100644 --- a/configurations/clients/sample/tools.yaml +++ b/configurations/clients/sample/tools.yaml @@ -6,41 +6,49 @@ _data_query_gpt_5_models: &data-query-gpt-5-models reasoningEffort: "none" verbosity: "low" temperature: 1 + seed: null dimensionsSelectionModelConfig: deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" - temperature: 1 + temperature: 1 + seed: null indicatorsSelectionModelConfig: deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" temperature: 1 + seed: null incompleteQueriesModelConfig: deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" temperature: 1 + seed: null groupExpanderModelConfig: deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" temperature: 1 + seed: null namedEntitiesModelConfig: deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" temperature: 1 + seed: null queryNormalizationModelConfig: deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" temperature: 1 + seed: null timePeriodModelConfig: deployment: "gpt-5.2-2025-12-11" reasoningEffort: "none" verbosity: "low" temperature: 1 + seed: null _relevancy_prompts: &relevancy_prompts systemMessage: |- You are an expert in statistical indicators. From 5c062bebd8cb6954dce086e4e2a52ee546aa76b5 Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Mon, 23 Feb 2026 12:08:47 +0200 Subject: [PATCH 17/18] change default model back to 4.1 --- statgpt/common/settings/langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/statgpt/common/settings/langchain.py b/statgpt/common/settings/langchain.py index 491f1e6d..bd355979 100644 --- a/statgpt/common/settings/langchain.py +++ b/statgpt/common/settings/langchain.py @@ -24,7 +24,7 @@ class LangChainSettings(BaseSettings): ) default_model: LLMModelsEnum = Field( - default=LLMModelsEnum.GPT_5_2_2025_12_11, + default=LLMModelsEnum.GPT_4_1_2025_04_14, description="Default LLM model for LangChain", ) From 6a47f431e3a5613e8d78572d50450dfddcaa5136 Mon Sep 17 00:00:00 2001 From: Bahdan Kapionkin Date: Mon, 23 Feb 2026 14:12:16 +0200 Subject: [PATCH 18/18] fix: add temperature for indexing models --- statgpt/common/schemas/data_query_tool.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/statgpt/common/schemas/data_query_tool.py b/statgpt/common/schemas/data_query_tool.py index 57306b0f..a47181a9 100644 --- a/statgpt/common/schemas/data_query_tool.py +++ b/statgpt/common/schemas/data_query_tool.py @@ -174,6 +174,7 @@ class HybridSearchConfig(BaseYamlModel): deployment=LLMModelsEnum.GPT_5_MINI_2025_08_07, reasoning_effort=ReasoningEffortEnum.MINIMAL, verbosity=VerbosityEnum.LOW, + temperature=1, ), ) harmonize_model_config: LLMModelConfig = Field( @@ -182,6 +183,7 @@ class HybridSearchConfig(BaseYamlModel): deployment=LLMModelsEnum.GPT_5_MINI_2025_08_07, reasoning_effort=ReasoningEffortEnum.MINIMAL, verbosity=VerbosityEnum.LOW, + temperature=1, ), )