Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions backend/app/services/collections/create_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
CreationRequest,
)
from app.services.collections.helpers import (
batch_documents,
extract_error_message,
to_collection_public,
)
Expand Down Expand Up @@ -179,7 +180,13 @@ def execute_job(
)

storage = get_cloud_storage(session=session, project_id=project_id)
document_crud = DocumentCrud(session, project_id)

batch_size = creation_request.batch_size or 10
docs_batches = batch_documents(
DocumentCrud(session, project_id),
creation_request.documents,
batch_size,
)

provider = get_llm_provider(
session=session,
Expand All @@ -191,15 +198,14 @@ def execute_job(
result = provider.create(
collection_request=creation_request,
storage=storage,
document_crud=document_crud,
docs_batches=docs_batches,
)

llm_service_id = result.llm_service_id
llm_service_name = result.llm_service_name

with Session(engine) as session:
document_crud = DocumentCrud(session, project_id)
flat_docs = document_crud.read_each(creation_request.documents)
# Flatten the already-loaded batches — no need for a second DB read
flat_docs = [doc for batch in docs_batches for doc in batch]

file_exts = {doc.fname.split(".")[-1] for doc in flat_docs if "." in doc.fname}
file_sizes_kb = [
Expand Down
16 changes: 4 additions & 12 deletions backend/app/services/collections/providers/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
from openai import OpenAI

from app.services.collections.providers import BaseProvider
from app.crud import DocumentCrud
from app.core.cloud.storage import CloudStorage
from app.crud.rag import OpenAIVectorStoreCrud, OpenAIAssistantCrud
from app.services.collections.helpers import batch_documents, get_service_name
from app.models import CreationRequest, Collection
from app.services.collections.helpers import get_service_name
from app.models import CreationRequest, Collection, Document


logger = logging.getLogger(__name__)
Expand All @@ -24,20 +23,13 @@ def create(
self,
collection_request: CreationRequest,
storage: CloudStorage,
document_crud: DocumentCrud,
docs_batches: list[list[Document]],
) -> Collection:
"""
Create OpenAI vector store with documents and optionally an assistant.
docs_batches must be pre-fetched inside a DB session before this call.
"""
try:
# Use user-provided batch_size, default to 10 if not set
batch_size = collection_request.batch_size or 10
docs_batches = batch_documents(
document_crud,
collection_request.documents,
batch_size,
)

vector_store_crud = OpenAIVectorStoreCrud(self.client)
vector_store = vector_store_crud.create()

Expand Down
Loading