Skip to content

Commit d0ed82c

Browse files
committed
Enforce the Responses API 50 MB image limit
git-stack-id: fjord/original_image_res---4hw7wvctsczel1 git-stack-title: Enforce the Responses API 50 MB image limit
1 parent ab3e4c2 commit d0ed82c

File tree

2 files changed

+311
-3
lines changed

2 files changed

+311
-3
lines changed

codex-rs/core/src/client.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ impl ModelClientSession {
522522
summary: ReasoningSummaryConfig,
523523
) -> Result<ResponsesApiRequest> {
524524
let instructions = &prompt.base_instructions.text;
525-
let input = prompt.get_formatted_input();
525+
let input = prompt.get_formatted_input()?;
526526
let tools = create_tools_json_for_responses_api(&prompt.tools)?;
527527
let default_reasoning_effort = model_info.default_reasoning_level;
528528
let reasoning = if model_info.supports_reasoning_summaries {

codex-rs/core/src/client_common.rs

Lines changed: 310 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
use crate::client_common::tools::ToolSpec;
22
use crate::config::types::Personality;
3+
use crate::error::CodexErr;
34
use crate::error::Result;
5+
use base64::Engine as _;
6+
use base64::prelude::BASE64_STANDARD;
47
pub use codex_api::common::ResponseEvent;
58
use codex_protocol::models::BaseInstructions;
9+
use codex_protocol::models::ContentItem;
610
use codex_protocol::models::FunctionCallOutputBody;
11+
use codex_protocol::models::FunctionCallOutputContentItem;
712
use codex_protocol::models::ResponseItem;
813
use futures::Stream;
914
use serde::Deserialize;
@@ -22,6 +27,12 @@ pub const REVIEW_EXIT_SUCCESS_TMPL: &str = include_str!("../templates/review/exi
2227
pub const REVIEW_EXIT_INTERRUPTED_TMPL: &str =
2328
include_str!("../templates/review/exit_interrupted.xml");
2429

30+
// See the Responses API image input size limits in the Images and Vision guide:
31+
// https://platform.openai.com/docs/guides/images-vision?api-mode=responses&format=file
32+
const RESPONSES_API_MAX_INLINE_IMAGE_BYTES: usize = 50_000_000;
33+
const RESPONSES_API_MAX_INLINE_IMAGE_BYTES_LABEL: &str = "50 MB";
34+
const INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER: &str = "Codex omitted this tool-returned image because the current request would exceed the Responses API 50 MB total image limit. Request fewer images at a time or inspect them in smaller batches.";
35+
2536
/// API request payload for a single model turn
2637
#[derive(Default, Debug, Clone)]
2738
pub struct Prompt {
@@ -45,7 +56,7 @@ pub struct Prompt {
4556
}
4657

4758
impl Prompt {
48-
pub(crate) fn get_formatted_input(&self) -> Vec<ResponseItem> {
59+
pub(crate) fn get_formatted_input(&self) -> Result<Vec<ResponseItem>> {
4960
let mut input = self.input.clone();
5061

5162
// when using the *Freeform* apply_patch tool specifically, tool outputs
@@ -60,7 +71,156 @@ impl Prompt {
6071
reserialize_shell_outputs(&mut input);
6172
}
6273

63-
input
74+
enforce_inline_image_request_budget(&mut input, RESPONSES_API_MAX_INLINE_IMAGE_BYTES)?;
75+
76+
Ok(input)
77+
}
78+
}
79+
80+
fn enforce_inline_image_request_budget(
81+
items: &mut [ResponseItem],
82+
max_inline_image_bytes: usize,
83+
) -> Result<()> {
84+
let mut inline_image_bytes = total_inline_image_bytes(items);
85+
let mut omitted_model_generated_image = false;
86+
87+
if inline_image_bytes <= max_inline_image_bytes {
88+
return Ok(());
89+
}
90+
91+
for item in items.iter_mut().rev() {
92+
if inline_image_bytes <= max_inline_image_bytes {
93+
return Ok(());
94+
}
95+
96+
let Some(content_items) = tool_output_content_items_mut(item) else {
97+
continue;
98+
};
99+
100+
for content_item in content_items.iter_mut().rev() {
101+
if inline_image_bytes <= max_inline_image_bytes {
102+
return Ok(());
103+
}
104+
105+
let FunctionCallOutputContentItem::InputImage { image_url, .. } = content_item else {
106+
continue;
107+
};
108+
let Some(image_bytes) = inline_image_data_url_bytes(image_url) else {
109+
continue;
110+
};
111+
112+
*content_item = FunctionCallOutputContentItem::InputText {
113+
text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER.to_string(),
114+
};
115+
inline_image_bytes = inline_image_bytes.saturating_sub(image_bytes);
116+
omitted_model_generated_image = true;
117+
}
118+
}
119+
120+
Err(CodexErr::InvalidRequest(
121+
inline_image_request_budget_exceeded_message(
122+
inline_image_bytes,
123+
max_inline_image_bytes,
124+
omitted_model_generated_image,
125+
),
126+
))
127+
}
128+
129+
fn total_inline_image_bytes(items: &[ResponseItem]) -> usize {
130+
items
131+
.iter()
132+
.map(response_item_inline_image_bytes)
133+
.sum::<usize>()
134+
}
135+
136+
fn response_item_inline_image_bytes(item: &ResponseItem) -> usize {
137+
match item {
138+
ResponseItem::Message { content, .. } => content
139+
.iter()
140+
.filter_map(|content_item| match content_item {
141+
ContentItem::InputImage { image_url } => inline_image_data_url_bytes(image_url),
142+
ContentItem::InputText { .. } | ContentItem::OutputText { .. } => None,
143+
})
144+
.sum::<usize>(),
145+
ResponseItem::FunctionCallOutput { output, .. }
146+
| ResponseItem::CustomToolCallOutput { output, .. } => output
147+
.content_items()
148+
.map(|content_items| {
149+
content_items
150+
.iter()
151+
.filter_map(|content_item| match content_item {
152+
FunctionCallOutputContentItem::InputImage { image_url, .. } => {
153+
inline_image_data_url_bytes(image_url)
154+
}
155+
FunctionCallOutputContentItem::InputText { .. } => None,
156+
})
157+
.sum::<usize>()
158+
})
159+
.unwrap_or_default(),
160+
_ => 0,
161+
}
162+
}
163+
164+
fn tool_output_content_items_mut(
165+
item: &mut ResponseItem,
166+
) -> Option<&mut Vec<FunctionCallOutputContentItem>> {
167+
match item {
168+
ResponseItem::FunctionCallOutput { output, .. }
169+
| ResponseItem::CustomToolCallOutput { output, .. } => output.content_items_mut(),
170+
_ => None,
171+
}
172+
}
173+
174+
fn inline_image_data_url_bytes(url: &str) -> Option<usize> {
175+
let payload = parse_base64_image_data_url(url)?;
176+
Some(BASE64_STANDARD.decode(payload).ok()?.len())
177+
}
178+
179+
fn parse_base64_image_data_url(url: &str) -> Option<&str> {
180+
if !url
181+
.get(.."data:".len())
182+
.is_some_and(|prefix| prefix.eq_ignore_ascii_case("data:"))
183+
{
184+
return None;
185+
}
186+
let comma_index = url.find(',')?;
187+
let metadata = &url[..comma_index];
188+
let payload = &url[comma_index + 1..];
189+
let metadata_without_scheme = &metadata["data:".len()..];
190+
let mut metadata_parts = metadata_without_scheme.split(';');
191+
let mime_type = metadata_parts.next().unwrap_or_default();
192+
let has_base64_marker = metadata_parts.any(|part| part.eq_ignore_ascii_case("base64"));
193+
if !mime_type
194+
.get(.."image/".len())
195+
.is_some_and(|prefix| prefix.eq_ignore_ascii_case("image/"))
196+
{
197+
return None;
198+
}
199+
if !has_base64_marker {
200+
return None;
201+
}
202+
Some(payload)
203+
}
204+
205+
fn inline_image_request_budget_exceeded_message(
206+
inline_image_bytes: usize,
207+
max_inline_image_bytes: usize,
208+
omitted_model_generated_image: bool,
209+
) -> String {
210+
let limit_label = if max_inline_image_bytes == RESPONSES_API_MAX_INLINE_IMAGE_BYTES {
211+
RESPONSES_API_MAX_INLINE_IMAGE_BYTES_LABEL.to_string()
212+
} else {
213+
format!("{max_inline_image_bytes} bytes")
214+
};
215+
216+
if omitted_model_generated_image {
217+
format!(
218+
"Codex could not send this turn because inline images still total {inline_image_bytes} bytes after omitting all model-generated tool images, exceeding the Responses API {limit_label} total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
219+
)
220+
} else {
221+
format!(
222+
"Codex could not send this turn because inline images total {inline_image_bytes} bytes, exceeding the Responses API {limit_label} total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
223+
)
64224
}
65225
}
66226

@@ -230,10 +390,14 @@ impl Stream for ResponseStream {
230390

231391
#[cfg(test)]
232392
mod tests {
393+
use base64::Engine as _;
394+
use base64::prelude::BASE64_STANDARD;
233395
use codex_api::ResponsesApiRequest;
234396
use codex_api::common::OpenAiVerbosity;
235397
use codex_api::common::TextControls;
236398
use codex_api::create_text_param_for_request;
399+
use codex_protocol::models::ContentItem;
400+
use codex_protocol::models::FunctionCallOutputContentItem;
237401
use codex_protocol::models::FunctionCallOutputPayload;
238402
use pretty_assertions::assert_eq;
239403

@@ -396,4 +560,148 @@ mod tests {
396560
]
397561
);
398562
}
563+
564+
#[test]
565+
fn rewrites_newest_tool_images_until_request_is_within_budget() {
566+
let mut items = vec![
567+
ResponseItem::Message {
568+
id: None,
569+
role: "user".to_string(),
570+
content: vec![ContentItem::InputImage {
571+
image_url: image_data_url(&[1, 2, 3, 4]),
572+
}],
573+
end_turn: None,
574+
phase: None,
575+
},
576+
ResponseItem::FunctionCallOutput {
577+
call_id: "call-1".to_string(),
578+
output: FunctionCallOutputPayload::from_content_items(vec![
579+
FunctionCallOutputContentItem::InputImage {
580+
image_url: image_data_url(&[5, 6, 7, 8]),
581+
detail: None,
582+
},
583+
]),
584+
},
585+
ResponseItem::CustomToolCallOutput {
586+
call_id: "call-2".to_string(),
587+
output: FunctionCallOutputPayload::from_content_items(vec![
588+
FunctionCallOutputContentItem::InputImage {
589+
image_url: image_data_url(&[9, 10, 11, 12]),
590+
detail: None,
591+
},
592+
]),
593+
},
594+
];
595+
596+
enforce_inline_image_request_budget(&mut items, 8).expect("request should fit");
597+
598+
assert_eq!(
599+
items,
600+
vec![
601+
ResponseItem::Message {
602+
id: None,
603+
role: "user".to_string(),
604+
content: vec![ContentItem::InputImage {
605+
image_url: image_data_url(&[1, 2, 3, 4]),
606+
}],
607+
end_turn: None,
608+
phase: None,
609+
},
610+
ResponseItem::FunctionCallOutput {
611+
call_id: "call-1".to_string(),
612+
output: FunctionCallOutputPayload::from_content_items(vec![
613+
FunctionCallOutputContentItem::InputImage {
614+
image_url: image_data_url(&[5, 6, 7, 8]),
615+
detail: None,
616+
},
617+
]),
618+
},
619+
ResponseItem::CustomToolCallOutput {
620+
call_id: "call-2".to_string(),
621+
output: FunctionCallOutputPayload::from_content_items(vec![
622+
FunctionCallOutputContentItem::InputText {
623+
text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER.to_string(),
624+
},
625+
]),
626+
},
627+
]
628+
);
629+
}
630+
631+
#[test]
632+
fn errors_when_user_images_still_exceed_request_budget() {
633+
let mut items = vec![ResponseItem::Message {
634+
id: None,
635+
role: "user".to_string(),
636+
content: vec![ContentItem::InputImage {
637+
image_url: image_data_url(&[1, 2, 3, 4]),
638+
}],
639+
end_turn: None,
640+
phase: None,
641+
}];
642+
643+
let err = enforce_inline_image_request_budget(&mut items, 3).expect_err("should fail");
644+
645+
assert_eq!(
646+
err.to_string(),
647+
"Codex could not send this turn because inline images total 4 bytes, exceeding the Responses API 3 bytes total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
648+
);
649+
}
650+
651+
#[test]
652+
fn errors_after_omitting_tool_images_if_user_images_still_exceed_budget() {
653+
let mut items = vec![
654+
ResponseItem::Message {
655+
id: None,
656+
role: "user".to_string(),
657+
content: vec![ContentItem::InputImage {
658+
image_url: image_data_url(&[1, 2, 3, 4]),
659+
}],
660+
end_turn: None,
661+
phase: None,
662+
},
663+
ResponseItem::FunctionCallOutput {
664+
call_id: "call-1".to_string(),
665+
output: FunctionCallOutputPayload::from_content_items(vec![
666+
FunctionCallOutputContentItem::InputImage {
667+
image_url: image_data_url(&[5, 6, 7, 8]),
668+
detail: None,
669+
},
670+
]),
671+
},
672+
];
673+
674+
let err = enforce_inline_image_request_budget(&mut items, 3).expect_err("should fail");
675+
676+
assert_eq!(
677+
err.to_string(),
678+
"Codex could not send this turn because inline images still total 4 bytes after omitting all model-generated tool images, exceeding the Responses API 3 bytes total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
679+
);
680+
assert_eq!(
681+
items,
682+
vec![
683+
ResponseItem::Message {
684+
id: None,
685+
role: "user".to_string(),
686+
content: vec![ContentItem::InputImage {
687+
image_url: image_data_url(&[1, 2, 3, 4]),
688+
}],
689+
end_turn: None,
690+
phase: None,
691+
},
692+
ResponseItem::FunctionCallOutput {
693+
call_id: "call-1".to_string(),
694+
output: FunctionCallOutputPayload::from_content_items(vec![
695+
FunctionCallOutputContentItem::InputText {
696+
text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER.to_string(),
697+
},
698+
]),
699+
},
700+
]
701+
);
702+
}
703+
704+
fn image_data_url(bytes: &[u8]) -> String {
705+
format!("data:image/png;base64,{}", BASE64_STANDARD.encode(bytes))
706+
}
399707
}

0 commit comments

Comments
 (0)