-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
63 lines (47 loc) · 2.11 KB
/
app.py
File metadata and controls
63 lines (47 loc) · 2.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from fastapi import FastAPI, UploadFile, File, HTTPException, status, Form
from fastapi.responses import JSONResponse
import tempfile, os, logging
from src.utils import extract_text
logger = logging.getLogger("uvicorn.error")
app = FastAPI()
@app.get("/")
def read_root():
return {"message": "RAG API is running. Use /docs to interact with it. "}
@app.post("/query_pdf", summary="Upload a PDF and ask your question.")
async def query_pdf(file: UploadFile=File(..., description="PDF file for knowledgebase."),
question: str= Form(..., description="Your question about the PDF.")):
# Validate content type
if file.content_type not in ("application/pdf", "appliation/octet-stream"):
raise HTTPException(status_code=400, detail="Please upload a PDF (content-type) application/pdf).")
# Save the upload to a temp file
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
data = await file.read()
if not data:
raise HTTPException(status_code=400, detail="Uploaded file is empty.")
tmp.write(data)
pdf_path = tmp.name
except Exception as e:
logger.exception("Failed to persist uploaded file")
raise HTTPException(status_code=500, detail="Failed to save uploaded file.") from e
# OCR extraction
try:
text = extract_text.from_pdf(pdf_path)
except Exception as e:
logger.exception("OCR/text extraction failed!")
raise HTTPException(status_code=500, detail="Failed to extract text from PDF.") from e
finally:
try:
os.remove(pdf_path)
except Exception:
pass
if not text or not text.strip():
raise HTTPException(status_code=400, detail="No text could be extracted form the PDF.")
try:
from src.config import LocalRag
rag = LocalRag(raw_text=text)
answer = rag.query(question)
return {"answer": answer}
except Exception as e:
logger.exception("RAG pipeline failed!")
raise HTTPException(status_code=500, detail=f"Failed to run RAG query: {e}...")