test(mcp): assert runner emits tool_call_output_item on MCP failures (#2556)

OiPunk · web-flow · commit fc284d43e7cc · 2026-02-28T09:33:56.000+09:00
diff --git a/tests/mcp/test_runner_calls_mcp.py b/tests/mcp/test_runner_calls_mcp.py
@@ -3,7 +3,15 @@
 import pytest
 from pydantic import BaseModel
 
-from agents import Agent, ModelBehaviorError, Runner, UserError
+from agents import (
+    Agent,
+    ModelBehaviorError,
+    RunContextWrapper,
+    Runner,
+    UserError,
+    default_tool_error_function,
+)
+from agents.exceptions import AgentsException
 
 from ..fake_model import FakeModel
 from ..test_responses import get_function_tool_call, get_text_message
@@ -195,3 +203,60 @@ async def test_runner_calls_mcp_tool_with_args(streaming: bool):
     assert server.tool_results == [f"result_test_tool_2_{json_args}"]
 
     await server.cleanup()
+
+
+class CrashingFakeMCPServer(FakeMCPServer):
+    async def call_tool(
+        self,
+        tool_name: str,
+        arguments: dict[str, object] | None,
+        meta: dict[str, object] | None = None,
+    ):
+        raise Exception("Crash!")
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("streaming", [False, True])
+async def test_runner_emits_mcp_error_tool_call_output_item(streaming: bool):
+    """Runner should emit tool_call_output_item with failure output when MCP tool raises."""
+    server = CrashingFakeMCPServer()
+    server.add_tool("crashing_tool", {})
+
+    model = FakeModel()
+    agent = Agent(
+        name="test",
+        model=model,
+        mcp_servers=[server],
+    )
+
+    model.add_multiple_turn_outputs(
+        [
+            [get_text_message("a_message"), get_function_tool_call("crashing_tool", "{}")],
+            [get_text_message("done")],
+        ]
+    )
+
+    if streaming:
+        streamed_result = Runner.run_streamed(agent, input="user_message")
+        async for _ in streamed_result.stream_events():
+            pass
+        tool_output_items = [
+            item for item in streamed_result.new_items if item.type == "tool_call_output_item"
+        ]
+        assert streamed_result.final_output == "done"
+    else:
+        non_streamed_result = await Runner.run(agent, input="user_message")
+        tool_output_items = [
+            item for item in non_streamed_result.new_items if item.type == "tool_call_output_item"
+        ]
+        assert non_streamed_result.final_output == "done"
+
+    assert tool_output_items, "Expected tool_call_output_item for MCP failure"
+    wrapped_error = AgentsException(
+        "Error invoking MCP tool crashing_tool on server 'fake_mcp_server': Crash!"
+    )
+    expected_error_message = default_tool_error_function(
+        RunContextWrapper(context=None),
+        wrapped_error,
+    )
+    assert tool_output_items[0].output == expected_error_message