-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsemango.yml
More file actions
94 lines (84 loc) · 1.99 KB
/
semango.yml
File metadata and controls
94 lines (84 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# Semango configuration file
# Embedding settings
embedding:
# Provider for embeddings: local, openai
provider: local
# Model name for the selected provider (Hugging Face ID or direct path for local provider)
model: onnx-models/all-MiniLM-L6-v2-onnx
# onnx_output_name: "[optional_pooling_layer]" # if not recognized by semango, sentence_embedding for example
batch_size: 48
concurrent: 4
# Use GPU acceleration for local embeddings if available (CUDA) - defaults to true
# gpu: true
# Directory where models are cached
model_cache_dir: "${SEMANGO_MODEL_DIR:=~/.cache/semango}"
# For openai provider:
# api_key: "your-api-key"
# api_key_env: "OPENAI_API_KEY"
# base_url: "https://api.openai.com/v1"
# base_url_env: "OPENAI_BASE_URL"
# Lexical search settings (BM25)
lexical:
enabled: true
index_path: ./semango/index/bleve
bm25_k1: 1.2
bm25_b: 0.75
# Reranker settings
# reranker:
# enabled: false
# provider: openai
# model: rerank-english-v3.0
# batch_size: 32
# per_request_override: true
# For openai provider:
# api_key: "your-api-key"
# api_key_env: "OPENAI_API_KEY"
# base_url: "https://api.openai.com/v1"
# base_url_env: "OPENAI_BASE_URL"
# Hybrid search merging settings
hybrid:
vector_weight: 0.7
lexical_weight: 0.3
fusion: linear
# File indexing settings
files:
include:
- "**/*.md"
- "**/*.go"
- "**/*.{png,jpg,jpeg}"
- "**/*.pdf"
- "**/*.csv"
- "**/*.json"
- "**/*.jsonl"
- "**/*.parquet"
exclude:
- ".git/**"
- "node_modules/**"
- "vendor/**"
chunk_size: 1000
chunk_overlap: 200
# Server settings
server:
host: 0.0.0.0
port: 8181
auth:
type: token
token_env: SEMANGO_TOKENS
tls_cert: ""
tls_key: ""
# Plugin settings
plugins:
- plugins/
- ../shared/my_custom.so
# Web UI settings
ui:
enabled: true
# Model Context Protocol (MCP) settings
mcp:
enabled: true
# Tabular data settings
tabular:
max_rows_embedded: 1000
sampling: random
min_text_tokens: 5
delimiter: ""