11use crate :: client_common:: tools:: ToolSpec ;
22use crate :: config:: types:: Personality ;
3+ use crate :: error:: CodexErr ;
34use crate :: error:: Result ;
5+ use base64:: Engine as _;
6+ use base64:: prelude:: BASE64_STANDARD ;
47pub use codex_api:: common:: ResponseEvent ;
58use codex_protocol:: models:: BaseInstructions ;
9+ use codex_protocol:: models:: ContentItem ;
610use codex_protocol:: models:: FunctionCallOutputBody ;
11+ use codex_protocol:: models:: FunctionCallOutputContentItem ;
712use codex_protocol:: models:: ResponseItem ;
813use futures:: Stream ;
914use serde:: Deserialize ;
@@ -22,6 +27,12 @@ pub const REVIEW_EXIT_SUCCESS_TMPL: &str = include_str!("../templates/review/exi
2227pub const REVIEW_EXIT_INTERRUPTED_TMPL : & str =
2328 include_str ! ( "../templates/review/exit_interrupted.xml" ) ;
2429
30+ // See the Responses API image input size limits in the Images and Vision guide:
31+ // https://platform.openai.com/docs/guides/images-vision?api-mode=responses&format=file
32+ const RESPONSES_API_MAX_INLINE_IMAGE_BYTES : usize = 50_000_000 ;
33+ const RESPONSES_API_MAX_INLINE_IMAGE_BYTES_LABEL : & str = "50 MB" ;
34+ const INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER : & str = "Codex omitted this tool-returned image because the current request would exceed the Responses API 50 MB total image limit. Request fewer images at a time or inspect them in smaller batches." ;
35+
2536/// API request payload for a single model turn
2637#[ derive( Default , Debug , Clone ) ]
2738pub struct Prompt {
@@ -45,7 +56,7 @@ pub struct Prompt {
4556}
4657
4758impl Prompt {
48- pub ( crate ) fn get_formatted_input ( & self ) -> Vec < ResponseItem > {
59+ pub ( crate ) fn get_formatted_input ( & self ) -> Result < Vec < ResponseItem > > {
4960 let mut input = self . input . clone ( ) ;
5061
5162 // when using the *Freeform* apply_patch tool specifically, tool outputs
@@ -60,7 +71,156 @@ impl Prompt {
6071 reserialize_shell_outputs ( & mut input) ;
6172 }
6273
63- input
74+ enforce_inline_image_request_budget ( & mut input, RESPONSES_API_MAX_INLINE_IMAGE_BYTES ) ?;
75+
76+ Ok ( input)
77+ }
78+ }
79+
80+ fn enforce_inline_image_request_budget (
81+ items : & mut [ ResponseItem ] ,
82+ max_inline_image_bytes : usize ,
83+ ) -> Result < ( ) > {
84+ let mut inline_image_bytes = total_inline_image_bytes ( items) ;
85+ let mut omitted_model_generated_image = false ;
86+
87+ if inline_image_bytes <= max_inline_image_bytes {
88+ return Ok ( ( ) ) ;
89+ }
90+
91+ for item in items. iter_mut ( ) . rev ( ) {
92+ if inline_image_bytes <= max_inline_image_bytes {
93+ return Ok ( ( ) ) ;
94+ }
95+
96+ let Some ( content_items) = tool_output_content_items_mut ( item) else {
97+ continue ;
98+ } ;
99+
100+ for content_item in content_items. iter_mut ( ) . rev ( ) {
101+ if inline_image_bytes <= max_inline_image_bytes {
102+ return Ok ( ( ) ) ;
103+ }
104+
105+ let FunctionCallOutputContentItem :: InputImage { image_url, .. } = content_item else {
106+ continue ;
107+ } ;
108+ let Some ( image_bytes) = inline_image_data_url_bytes ( image_url) else {
109+ continue ;
110+ } ;
111+
112+ * content_item = FunctionCallOutputContentItem :: InputText {
113+ text : INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER . to_string ( ) ,
114+ } ;
115+ inline_image_bytes = inline_image_bytes. saturating_sub ( image_bytes) ;
116+ omitted_model_generated_image = true ;
117+ }
118+ }
119+
120+ Err ( CodexErr :: InvalidRequest (
121+ inline_image_request_budget_exceeded_message (
122+ inline_image_bytes,
123+ max_inline_image_bytes,
124+ omitted_model_generated_image,
125+ ) ,
126+ ) )
127+ }
128+
129+ fn total_inline_image_bytes ( items : & [ ResponseItem ] ) -> usize {
130+ items
131+ . iter ( )
132+ . map ( response_item_inline_image_bytes)
133+ . sum :: < usize > ( )
134+ }
135+
136+ fn response_item_inline_image_bytes ( item : & ResponseItem ) -> usize {
137+ match item {
138+ ResponseItem :: Message { content, .. } => content
139+ . iter ( )
140+ . filter_map ( |content_item| match content_item {
141+ ContentItem :: InputImage { image_url } => inline_image_data_url_bytes ( image_url) ,
142+ ContentItem :: InputText { .. } | ContentItem :: OutputText { .. } => None ,
143+ } )
144+ . sum :: < usize > ( ) ,
145+ ResponseItem :: FunctionCallOutput { output, .. }
146+ | ResponseItem :: CustomToolCallOutput { output, .. } => output
147+ . content_items ( )
148+ . map ( |content_items| {
149+ content_items
150+ . iter ( )
151+ . filter_map ( |content_item| match content_item {
152+ FunctionCallOutputContentItem :: InputImage { image_url, .. } => {
153+ inline_image_data_url_bytes ( image_url)
154+ }
155+ FunctionCallOutputContentItem :: InputText { .. } => None ,
156+ } )
157+ . sum :: < usize > ( )
158+ } )
159+ . unwrap_or_default ( ) ,
160+ _ => 0 ,
161+ }
162+ }
163+
164+ fn tool_output_content_items_mut (
165+ item : & mut ResponseItem ,
166+ ) -> Option < & mut Vec < FunctionCallOutputContentItem > > {
167+ match item {
168+ ResponseItem :: FunctionCallOutput { output, .. }
169+ | ResponseItem :: CustomToolCallOutput { output, .. } => output. content_items_mut ( ) ,
170+ _ => None ,
171+ }
172+ }
173+
174+ fn inline_image_data_url_bytes ( url : & str ) -> Option < usize > {
175+ let payload = parse_base64_image_data_url ( url) ?;
176+ Some ( BASE64_STANDARD . decode ( payload) . ok ( ) ?. len ( ) )
177+ }
178+
179+ fn parse_base64_image_data_url ( url : & str ) -> Option < & str > {
180+ if !url
181+ . get ( .."data:" . len ( ) )
182+ . is_some_and ( |prefix| prefix. eq_ignore_ascii_case ( "data:" ) )
183+ {
184+ return None ;
185+ }
186+ let comma_index = url. find ( ',' ) ?;
187+ let metadata = & url[ ..comma_index] ;
188+ let payload = & url[ comma_index + 1 ..] ;
189+ let metadata_without_scheme = & metadata[ "data:" . len ( ) ..] ;
190+ let mut metadata_parts = metadata_without_scheme. split ( ';' ) ;
191+ let mime_type = metadata_parts. next ( ) . unwrap_or_default ( ) ;
192+ let has_base64_marker = metadata_parts. any ( |part| part. eq_ignore_ascii_case ( "base64" ) ) ;
193+ if !mime_type
194+ . get ( .."image/" . len ( ) )
195+ . is_some_and ( |prefix| prefix. eq_ignore_ascii_case ( "image/" ) )
196+ {
197+ return None ;
198+ }
199+ if !has_base64_marker {
200+ return None ;
201+ }
202+ Some ( payload)
203+ }
204+
205+ fn inline_image_request_budget_exceeded_message (
206+ inline_image_bytes : usize ,
207+ max_inline_image_bytes : usize ,
208+ omitted_model_generated_image : bool ,
209+ ) -> String {
210+ let limit_label = if max_inline_image_bytes == RESPONSES_API_MAX_INLINE_IMAGE_BYTES {
211+ RESPONSES_API_MAX_INLINE_IMAGE_BYTES_LABEL . to_string ( )
212+ } else {
213+ format ! ( "{max_inline_image_bytes} bytes" )
214+ } ;
215+
216+ if omitted_model_generated_image {
217+ format ! (
218+ "Codex could not send this turn because inline images still total {inline_image_bytes} bytes after omitting all model-generated tool images, exceeding the Responses API {limit_label} total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
219+ )
220+ } else {
221+ format ! (
222+ "Codex could not send this turn because inline images total {inline_image_bytes} bytes, exceeding the Responses API {limit_label} total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
223+ )
64224 }
65225}
66226
@@ -230,10 +390,14 @@ impl Stream for ResponseStream {
230390
231391#[ cfg( test) ]
232392mod tests {
393+ use base64:: Engine as _;
394+ use base64:: prelude:: BASE64_STANDARD ;
233395 use codex_api:: ResponsesApiRequest ;
234396 use codex_api:: common:: OpenAiVerbosity ;
235397 use codex_api:: common:: TextControls ;
236398 use codex_api:: create_text_param_for_request;
399+ use codex_protocol:: models:: ContentItem ;
400+ use codex_protocol:: models:: FunctionCallOutputContentItem ;
237401 use codex_protocol:: models:: FunctionCallOutputPayload ;
238402 use pretty_assertions:: assert_eq;
239403
@@ -396,4 +560,148 @@ mod tests {
396560 ]
397561 ) ;
398562 }
563+
564+ #[ test]
565+ fn rewrites_newest_tool_images_until_request_is_within_budget ( ) {
566+ let mut items = vec ! [
567+ ResponseItem :: Message {
568+ id: None ,
569+ role: "user" . to_string( ) ,
570+ content: vec![ ContentItem :: InputImage {
571+ image_url: image_data_url( & [ 1 , 2 , 3 , 4 ] ) ,
572+ } ] ,
573+ end_turn: None ,
574+ phase: None ,
575+ } ,
576+ ResponseItem :: FunctionCallOutput {
577+ call_id: "call-1" . to_string( ) ,
578+ output: FunctionCallOutputPayload :: from_content_items( vec![
579+ FunctionCallOutputContentItem :: InputImage {
580+ image_url: image_data_url( & [ 5 , 6 , 7 , 8 ] ) ,
581+ detail: None ,
582+ } ,
583+ ] ) ,
584+ } ,
585+ ResponseItem :: CustomToolCallOutput {
586+ call_id: "call-2" . to_string( ) ,
587+ output: FunctionCallOutputPayload :: from_content_items( vec![
588+ FunctionCallOutputContentItem :: InputImage {
589+ image_url: image_data_url( & [ 9 , 10 , 11 , 12 ] ) ,
590+ detail: None ,
591+ } ,
592+ ] ) ,
593+ } ,
594+ ] ;
595+
596+ enforce_inline_image_request_budget ( & mut items, 8 ) . expect ( "request should fit" ) ;
597+
598+ assert_eq ! (
599+ items,
600+ vec![
601+ ResponseItem :: Message {
602+ id: None ,
603+ role: "user" . to_string( ) ,
604+ content: vec![ ContentItem :: InputImage {
605+ image_url: image_data_url( & [ 1 , 2 , 3 , 4 ] ) ,
606+ } ] ,
607+ end_turn: None ,
608+ phase: None ,
609+ } ,
610+ ResponseItem :: FunctionCallOutput {
611+ call_id: "call-1" . to_string( ) ,
612+ output: FunctionCallOutputPayload :: from_content_items( vec![
613+ FunctionCallOutputContentItem :: InputImage {
614+ image_url: image_data_url( & [ 5 , 6 , 7 , 8 ] ) ,
615+ detail: None ,
616+ } ,
617+ ] ) ,
618+ } ,
619+ ResponseItem :: CustomToolCallOutput {
620+ call_id: "call-2" . to_string( ) ,
621+ output: FunctionCallOutputPayload :: from_content_items( vec![
622+ FunctionCallOutputContentItem :: InputText {
623+ text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER . to_string( ) ,
624+ } ,
625+ ] ) ,
626+ } ,
627+ ]
628+ ) ;
629+ }
630+
631+ #[ test]
632+ fn errors_when_user_images_still_exceed_request_budget ( ) {
633+ let mut items = vec ! [ ResponseItem :: Message {
634+ id: None ,
635+ role: "user" . to_string( ) ,
636+ content: vec![ ContentItem :: InputImage {
637+ image_url: image_data_url( & [ 1 , 2 , 3 , 4 ] ) ,
638+ } ] ,
639+ end_turn: None ,
640+ phase: None ,
641+ } ] ;
642+
643+ let err = enforce_inline_image_request_budget ( & mut items, 3 ) . expect_err ( "should fail" ) ;
644+
645+ assert_eq ! (
646+ err. to_string( ) ,
647+ "Codex could not send this turn because inline images total 4 bytes, exceeding the Responses API 3 bytes total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
648+ ) ;
649+ }
650+
651+ #[ test]
652+ fn errors_after_omitting_tool_images_if_user_images_still_exceed_budget ( ) {
653+ let mut items = vec ! [
654+ ResponseItem :: Message {
655+ id: None ,
656+ role: "user" . to_string( ) ,
657+ content: vec![ ContentItem :: InputImage {
658+ image_url: image_data_url( & [ 1 , 2 , 3 , 4 ] ) ,
659+ } ] ,
660+ end_turn: None ,
661+ phase: None ,
662+ } ,
663+ ResponseItem :: FunctionCallOutput {
664+ call_id: "call-1" . to_string( ) ,
665+ output: FunctionCallOutputPayload :: from_content_items( vec![
666+ FunctionCallOutputContentItem :: InputImage {
667+ image_url: image_data_url( & [ 5 , 6 , 7 , 8 ] ) ,
668+ detail: None ,
669+ } ,
670+ ] ) ,
671+ } ,
672+ ] ;
673+
674+ let err = enforce_inline_image_request_budget ( & mut items, 3 ) . expect_err ( "should fail" ) ;
675+
676+ assert_eq ! (
677+ err. to_string( ) ,
678+ "Codex could not send this turn because inline images still total 4 bytes after omitting all model-generated tool images, exceeding the Responses API 3 bytes total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
679+ ) ;
680+ assert_eq ! (
681+ items,
682+ vec![
683+ ResponseItem :: Message {
684+ id: None ,
685+ role: "user" . to_string( ) ,
686+ content: vec![ ContentItem :: InputImage {
687+ image_url: image_data_url( & [ 1 , 2 , 3 , 4 ] ) ,
688+ } ] ,
689+ end_turn: None ,
690+ phase: None ,
691+ } ,
692+ ResponseItem :: FunctionCallOutput {
693+ call_id: "call-1" . to_string( ) ,
694+ output: FunctionCallOutputPayload :: from_content_items( vec![
695+ FunctionCallOutputContentItem :: InputText {
696+ text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER . to_string( ) ,
697+ } ,
698+ ] ) ,
699+ } ,
700+ ]
701+ ) ;
702+ }
703+
704+ fn image_data_url ( bytes : & [ u8 ] ) -> String {
705+ format ! ( "data:image/png;base64,{}" , BASE64_STANDARD . encode( bytes) )
706+ }
399707}
0 commit comments