From a0732ee0c0cfb015c2adeb3985b2c6092d46cad6 Mon Sep 17 00:00:00 2001 From: Prajna1999 Date: Fri, 10 Apr 2026 08:56:58 +0530 Subject: [PATCH] refactor: smol moving around db/network calls --- .../services/collections/create_collection.py | 16 +++++++++++----- .../app/services/collections/providers/openai.py | 16 ++++------------ 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/backend/app/services/collections/create_collection.py b/backend/app/services/collections/create_collection.py index b69154ac4..397e4eb3c 100644 --- a/backend/app/services/collections/create_collection.py +++ b/backend/app/services/collections/create_collection.py @@ -23,6 +23,7 @@ CreationRequest, ) from app.services.collections.helpers import ( + batch_documents, extract_error_message, to_collection_public, ) @@ -179,7 +180,13 @@ def execute_job( ) storage = get_cloud_storage(session=session, project_id=project_id) - document_crud = DocumentCrud(session, project_id) + + batch_size = creation_request.batch_size or 10 + docs_batches = batch_documents( + DocumentCrud(session, project_id), + creation_request.documents, + batch_size, + ) provider = get_llm_provider( session=session, @@ -191,15 +198,14 @@ def execute_job( result = provider.create( collection_request=creation_request, storage=storage, - document_crud=document_crud, + docs_batches=docs_batches, ) llm_service_id = result.llm_service_id llm_service_name = result.llm_service_name - with Session(engine) as session: - document_crud = DocumentCrud(session, project_id) - flat_docs = document_crud.read_each(creation_request.documents) + # Flatten the already-loaded batches — no need for a second DB read + flat_docs = [doc for batch in docs_batches for doc in batch] file_exts = {doc.fname.split(".")[-1] for doc in flat_docs if "." in doc.fname} file_sizes_kb = [ diff --git a/backend/app/services/collections/providers/openai.py b/backend/app/services/collections/providers/openai.py index bdab86d3a..9dfaab31c 100644 --- a/backend/app/services/collections/providers/openai.py +++ b/backend/app/services/collections/providers/openai.py @@ -3,11 +3,10 @@ from openai import OpenAI from app.services.collections.providers import BaseProvider -from app.crud import DocumentCrud from app.core.cloud.storage import CloudStorage from app.crud.rag import OpenAIVectorStoreCrud, OpenAIAssistantCrud -from app.services.collections.helpers import batch_documents, get_service_name -from app.models import CreationRequest, Collection +from app.services.collections.helpers import get_service_name +from app.models import CreationRequest, Collection, Document logger = logging.getLogger(__name__) @@ -24,20 +23,13 @@ def create( self, collection_request: CreationRequest, storage: CloudStorage, - document_crud: DocumentCrud, + docs_batches: list[list[Document]], ) -> Collection: """ Create OpenAI vector store with documents and optionally an assistant. + docs_batches must be pre-fetched inside a DB session before this call. """ try: - # Use user-provided batch_size, default to 10 if not set - batch_size = collection_request.batch_size or 10 - docs_batches = batch_documents( - document_crud, - collection_request.documents, - batch_size, - ) - vector_store_crud = OpenAIVectorStoreCrud(self.client) vector_store = vector_store_crud.create()