11import inspect
22import logging
3+ import time
34import warnings
45from uuid import UUID , uuid4
56from typing import Any , List , Optional
1112from pydantic import BaseModel , Field , HttpUrl
1213from sqlalchemy .exc import NoResultFound , MultipleResultsFound , SQLAlchemyError
1314
14- from app .api .deps import CurrentUser , SessionDep
15+ from app .api .deps import CurrentUser , SessionDep , CurrentUserOrgProject
1516from app .core .cloud import AmazonCloudStorage
1617from app .core .config import settings
1718from app .core .util import now , raise_from_unknown , post_callback
1819from app .crud import DocumentCrud , CollectionCrud , DocumentCollectionCrud
1920from app .crud .rag import OpenAIVectorStoreCrud , OpenAIAssistantCrud
2021from app .models import Collection , Document
22+ from app .models .collection import CollectionStatus
2123from app .utils import APIResponse , load_description
2224
2325router = APIRouter (prefix = "/collections" , tags = ["collections" ])
@@ -173,61 +175,77 @@ def do_create_collection(
173175 request : CreationRequest ,
174176 payload : ResponsePayload ,
175177):
178+ start_time = time .time ()
176179 client = OpenAI (api_key = settings .OPENAI_API_KEY )
177- if request .callback_url is None :
178- callback = SilentCallback (payload )
179- else :
180- callback = WebHookCallback (request .callback_url , payload )
181-
182- #
183- # Create the assistant and vector store
184- #
185-
186- vector_store_crud = OpenAIVectorStoreCrud (client )
187- try :
188- vector_store = vector_store_crud .create ()
189- except OpenAIError as err :
190- callback .fail (str (err ))
191- return
180+ callback = (
181+ SilentCallback (payload )
182+ if request .callback_url is None
183+ else WebHookCallback (request .callback_url , payload )
184+ )
192185
193186 storage = AmazonCloudStorage (current_user )
194187 document_crud = DocumentCrud (session , current_user .id )
195188 assistant_crud = OpenAIAssistantCrud (client )
189+ vector_store_crud = OpenAIVectorStoreCrud (client )
190+ collection_crud = CollectionCrud (session , current_user .id )
196191
197- docs = request (document_crud )
198- kwargs = dict (request .extract_super_type (AssistantOptions ))
199192 try :
200- updates = vector_store_crud .update (vector_store .id , storage , docs )
201- documents = list (updates )
202- assistant = assistant_crud .create (vector_store .id , ** kwargs )
203- except Exception as err : # blanket to handle SQL and OpenAI errors
204- logging .error (f"File Search setup error: { err } ({ type (err ).__name__ } )" )
205- vector_store_crud .delete (vector_store .id )
206- callback .fail (str (err ))
207- return
193+ vector_store = vector_store_crud .create ()
208194
209- #
210- # Store the results
211- #
195+ docs = list (request (document_crud ))
196+ flat_docs = [doc for sublist in docs for doc in sublist ]
212197
213- collection_crud = CollectionCrud (session , current_user .id )
214- collection = Collection (
215- id = UUID (payload .key ),
216- llm_service_id = assistant .id ,
217- llm_service_name = request .model ,
218- )
219- try :
220- collection_crud .create (collection , documents )
221- except SQLAlchemyError as err :
222- _backout (assistant_crud , assistant .id )
223- callback .fail (str (err ))
224- return
198+ file_exts = {doc .fname .split ("." )[- 1 ] for doc in flat_docs if "." in doc .fname }
199+ file_sizes_kb = [
200+ storage .get_file_size_kb (doc .object_store_url ) for doc in flat_docs
201+ ]
202+
203+ logging .info (
204+ f"[VectorStore Update] Uploading { len (flat_docs )} documents to vector store { vector_store .id } "
205+ )
206+ list (vector_store_crud .update (vector_store .id , storage , docs ))
207+ logging .info (f"[VectorStore Upload] Upload completed" )
208+
209+ assistant_options = dict (request .extract_super_type (AssistantOptions ))
210+ logging .info (
211+ f"[Assistant Create] Creating assistant with options: { assistant_options } "
212+ )
213+ assistant = assistant_crud .create (vector_store .id , ** assistant_options )
214+ logging .info (f"[Assistant Create] Assistant created: { assistant .id } " )
215+
216+ collection = collection_crud .read_one (UUID (payload .key ))
217+ collection .llm_service_id = assistant .id
218+ collection .llm_service_name = request .model
219+ collection .status = CollectionStatus .successful
220+ collection .updated_at = now ()
221+
222+ if flat_docs :
223+ logging .info (
224+ f"[DocumentCollection] Linking { len (flat_docs )} documents to collection { collection .id } "
225+ )
226+ DocumentCollectionCrud (session ).create (collection , flat_docs )
227+
228+ collection_crud ._update (collection )
225229
226- #
227- # Send back successful response
228- #
230+ elapsed = time .time () - start_time
231+ logging .info (
232+ f"Collection created: { collection .id } | Time: { elapsed :.2f} s | "
233+ f"Files: { len (flat_docs )} | Sizes: { file_sizes_kb } KB | Types: { list (file_exts )} "
234+ )
235+ callback .success (collection .model_dump (mode = "json" ))
229236
230- callback .success (collection .model_dump (mode = "json" ))
237+ except Exception as err :
238+ logging .error (f"[Collection Creation Failed] { err } ({ type (err ).__name__ } )" )
239+ if "assistant" in locals ():
240+ _backout (assistant_crud , assistant .id )
241+ try :
242+ collection = collection_crud .read_one (UUID (payload .key ))
243+ collection .status = CollectionStatus .failed
244+ collection .updated_at = now ()
245+ collection_crud ._update (collection )
246+ except Exception as suberr :
247+ logging .warning (f"[Collection Status Update Failed] { suberr } " )
248+ callback .fail (str (err ))
231249
232250
233251@router .post (
@@ -236,14 +254,26 @@ def do_create_collection(
236254)
237255def create_collection (
238256 session : SessionDep ,
239- current_user : CurrentUser ,
257+ current_user : CurrentUserOrgProject ,
240258 request : CreationRequest ,
241259 background_tasks : BackgroundTasks ,
242260):
243261 this = inspect .currentframe ()
244262 route = router .url_path_for (this .f_code .co_name )
245263 payload = ResponsePayload ("processing" , route )
246264
265+ collection = Collection (
266+ id = UUID (payload .key ),
267+ owner_id = current_user .id ,
268+ organization_id = current_user .organization_id ,
269+ project_id = current_user .project_id ,
270+ status = CollectionStatus .processing ,
271+ )
272+
273+ collection_crud = CollectionCrud (session , current_user .id )
274+ collection_crud .create (collection )
275+
276+ # 2. Launch background task
247277 background_tasks .add_task (
248278 do_create_collection ,
249279 session ,
0 commit comments