From 09befca7225bcee94adf854239c551413104c489 Mon Sep 17 00:00:00 2001
From: NIK-TIGER-BILL <nik.tiger.bill@github.com>
Date: Wed, 18 Mar 2026 07:22:00 +0000
Subject: [PATCH] fix(files): stream content in 1 MB chunks to prevent
 ConnectionResetError on large files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes #2959

Batch API result files larger than ~200 MB caused ConnectionResetError
because HttpxBinaryResponseContent.content read the entire HTTP body
at once via response.content. Long-lived connections get reset by the
server before a single large read completes.

Fix: replace response.content with a loop over response.iter_bytes()
using 1 MB chunks. The return type and semantics are unchanged — the
property still returns bytes — but the data is read incrementally,
keeping the connection alive throughout the download.
---
 src/openai/_legacy_response.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/openai/_legacy_response.py b/src/openai/_legacy_response.py
index 1a58c2dfc3..f8b2dcb44b 100644
--- a/src/openai/_legacy_response.py
+++ b/src/openai/_legacy_response.py
@@ -394,7 +394,17 @@ def __init__(self, response: httpx.Response) -> None:
 
     @property
     def content(self) -> bytes:
-        return self.response.content
+        """Return the response content, streamed in chunks to avoid ConnectionResetError on large files.
+
+        Streaming in 1 MB chunks prevents issues with large Batch API result files (>200 MB)
+        where reading the entire body at once can trigger a server-side connection reset
+        on long-lived HTTP connections. Fixes #2959.
+        """
+        _CHUNK_SIZE = 1024 * 1024  # 1 MB
+        buf = bytearray()
+        for chunk in self.response.iter_bytes(chunk_size=_CHUNK_SIZE):
+            buf.extend(chunk)
+        return bytes(buf)
 
     @property
     def text(self) -> str: