cot_proxy/.env.example at main · bold84/cot_proxy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# Host port to expose the proxy service on
HOST_PORT=3000

# Target API endpoint for the proxy
TARGET_BASE_URL=http://your-model-server:8080/

# Enable debug logging (true/false)
DEBUG=false

# Timeout for API requests in seconds
API_REQUEST_TIMEOUT=3000

# LLM parameter overrides.
# Format: "model=MODEL_NAME,param1=val1,param2=val2;model=ANOTHER_MODEL,param3=val3"
# Special parameters for think tag stripping: think_tag_start, think_tag_end
# Example:
LLM_PARAMS=model=default,temperature=0.7,enable_think_tag_filtering=true,think_tag_start=<think>,think_tag_end=</think>

# Example for Qwen3-32B-Non-Thinking with specific parameters, removing the think tags as they aren't needed anyways:
# LLM_PARAMS=model=Qwen3-32B-Non-Thinking,upstream_model_name=Qwen3-32B,temperature=0.7,top_k=20,top_p=0.8,enable_think_tag_filtering=true,think_tag_start=<think>,think_tag_end=</think>,append_to_last_user_message=\n\n/no_think

# Example for Qwen-32B-Thinking with specific parameters, keeping the think tags:
# LLM_PARAMS=model=Qwen-32B-Thinking,upstream_model_name=Qwen-32B,temperature=0.6,top_k=20,top_p=0.95,append_to_last_user_message=\n\n/think

# Example for Qwen3-32B-Thinking with specific parameters, removing the think tags:
# LLM_PARAMS=model=Qwen3-32B-Thinking,upstream_model_name=Qwen3-32B,temperature=0.6,top_k=20,top_p=0.95,append_to_last_user_message=\n\n/think,enable_think_tag_filtering=true,think_tag_start=<think>,think_tag_end=</think>

# Optional: Global default start tag for stripping thought processes.
# Overridden by model-specific think_tag_start in LLM_PARAMS. Defaults to '<think>' if not set.
THINK_TAG=<think>

# Optional: Global default end tag for stripping thought processes.
# Overridden by model-specific think_tag_end in LLM_PARAMS. Defaults to '</think>' if not set.
THINK_END_TAG=</think>