forked from endee-io/endee
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathretriever.py
More file actions
95 lines (80 loc) · 3.23 KB
/
retriever.py
File metadata and controls
95 lines (80 loc) · 3.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""
retriever.py
Semantic retrieval from Endee + optional RAG answer generation
"""
import os
from typing import List, Dict, Any, Optional, Tuple
from endee import Endee
from embedder import Embedder
INDEX_NAME = "industrial_docs"
ENDEE_HOST = "http://localhost:8080"
class Retriever:
def __init__(self, host: str = ENDEE_HOST, auth_token: str = ""):
self.client = Endee(auth_token) if auth_token else Endee()
self.client.set_base_url(f"{host}/api/v1")
self.embedder = Embedder()
self.index_name = INDEX_NAME
def search(self, query: str, top_k: int = 5, category_filter: Optional[str] = None) -> List[Dict[str, Any]]:
query_vec = self.embedder.embed_single(query)
index = self.client.get_index(name=self.index_name)
results = index.query(
vector=query_vec,
top_k=top_k,
)
formatted = []
for r in results:
if isinstance(r, dict):
meta = r.get("meta", {})
formatted.append({
"id": r.get("id", ""),
"score": round(float(r.get("similarity", 0)), 4),
"title": meta.get("title", ""),
"category": meta.get("category", ""),
"content": meta.get("content", ""),
})
else:
meta = getattr(r, "meta", {}) or {}
formatted.append({
"id": getattr(r, "id", ""),
"score": round(float(getattr(r, "similarity", 0)), 4),
"title": meta.get("title", ""),
"category": meta.get("category", ""),
"content": meta.get("content", ""),
})
return formatted
class RAGAnswerGenerator:
def __init__(self):
self.gemini_available = False
api_key = os.getenv("GOOGLE_API_KEY", "")
if api_key:
try:
import google.generativeai as genai
genai.configure(api_key=api_key)
self.model = genai.GenerativeModel("gemini-1.5-flash")
self.gemini_available = True
except ImportError:
pass
def generate(self, question: str, context_docs: List[Dict[str, Any]]) -> Tuple[str, bool]:
if self.gemini_available and context_docs:
context_text = "\n\n".join(
f"[{i+1}] {d['title']}\n{d['content']}"
for i, d in enumerate(context_docs)
)
prompt = (
"You are an industrial safety expert assistant.\n"
"Use ONLY the context below to answer the question.\n\n"
f"CONTEXT:\n{context_text}\n\n"
f"QUESTION: {question}\n\nANSWER:"
)
response = self.model.generate_content(prompt)
return response.text.strip(), True
if context_docs:
top = context_docs[0]
snippet = top["content"][:500]
return (
f"**Top relevant document:** {top['title']}\n\n"
f"{snippet}...\n\n"
f"*(Set Google Gemini API key for AI-generated answers)*",
False,
)
return "No relevant documents found.", False