Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/my-website/docs/proxy/config_settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,10 @@ router_settings:
| DEFAULT_MCP_SEMANTIC_FILTER_SIMILARITY_THRESHOLD | Default similarity threshold for MCP semantic tool filtering. Default is 0.3
| DEFAULT_MCP_SEMANTIC_FILTER_TOP_K | Default number of top results to return for MCP semantic tool filtering. Default is 10
| MCP_NPM_CACHE_DIR | Directory for npm cache used by STDIO MCP servers. In containers the default (~/.npm) may not exist or be read-only. Default is `/tmp/.npm_mcp_cache`
| LITELLM_MCP_CLIENT_TIMEOUT | MCP client connection timeout in seconds (stdio and HTTP/SSE transports). Default is 60
| LITELLM_MCP_TOOL_LISTING_TIMEOUT | Timeout in seconds for listing tools from an MCP server. Default is 30
| LITELLM_MCP_METADATA_TIMEOUT | HTTP client timeout in seconds for OAuth metadata fetching. Default is 10
| LITELLM_MCP_HEALTH_CHECK_TIMEOUT | Health check timeout in seconds for MCP servers. Default is 10
| MCP_OAUTH2_TOKEN_CACHE_DEFAULT_TTL | Default TTL in seconds for MCP OAuth2 token cache. Default is 3600
| MCP_OAUTH2_TOKEN_CACHE_MAX_SIZE | Maximum number of entries in MCP OAuth2 token cache. Default is 200
| MCP_OAUTH2_TOKEN_CACHE_MIN_TTL | Minimum TTL in seconds for MCP OAuth2 token cache. Default is 10
Expand Down
6 changes: 6 additions & 0 deletions litellm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,12 @@
MCP_NPM_CACHE_DIR = os.getenv("MCP_NPM_CACHE_DIR", "/tmp/.npm_mcp_cache")
MCP_OAUTH2_TOKEN_CACHE_MIN_TTL = int(os.getenv("MCP_OAUTH2_TOKEN_CACHE_MIN_TTL", "10"))

# MCP timeout defaults (seconds). Override via env vars for slow/custom MCP servers.
MCP_CLIENT_TIMEOUT = float(os.getenv("LITELLM_MCP_CLIENT_TIMEOUT", "60.0"))
MCP_TOOL_LISTING_TIMEOUT = float(os.getenv("LITELLM_MCP_TOOL_LISTING_TIMEOUT", "30.0"))
MCP_METADATA_TIMEOUT = float(os.getenv("LITELLM_MCP_METADATA_TIMEOUT", "10.0"))
MCP_HEALTH_CHECK_TIMEOUT = float(os.getenv("LITELLM_MCP_HEALTH_CHECK_TIMEOUT", "10.0"))

LITELLM_UI_ALLOW_HEADERS = [
"x-litellm-semantic-filter",
"x-litellm-semantic-filter-tools",
Expand Down
5 changes: 3 additions & 2 deletions litellm/experimental_mcp_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from pydantic import AnyUrl

from litellm._logging import verbose_logger
from litellm.constants import MCP_CLIENT_TIMEOUT
from litellm.llms.custom_httpx.http_handler import get_ssl_configuration
from litellm.types.llms.custom_http import VerifyTypes
from litellm.types.mcp import (
Expand Down Expand Up @@ -63,15 +64,15 @@ def __init__(
transport_type: MCPTransportType = MCPTransport.http,
auth_type: MCPAuthType = None,
auth_value: Optional[Union[str, Dict[str, str]]] = None,
timeout: float = 60.0,
timeout: Optional[float] = None,
stdio_config: Optional[MCPStdioConfig] = None,
extra_headers: Optional[Dict[str, str]] = None,
ssl_verify: Optional[VerifyTypes] = None,
):
self.server_url: str = server_url
self.transport_type: MCPTransport = transport_type
self.auth_type: MCPAuthType = auth_type
self.timeout: float = timeout
self.timeout: float = timeout if timeout is not None else MCP_CLIENT_TIMEOUT
self._mcp_auth_value: Optional[Union[str, Dict[str, str]]] = None
self.stdio_config: Optional[MCPStdioConfig] = stdio_config
self.extra_headers: Optional[Dict[str, str]] = extra_headers
Expand Down
24 changes: 17 additions & 7 deletions litellm/proxy/_experimental/mcp_server/mcp_server_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@

import litellm
from litellm._logging import verbose_logger
from litellm.constants import (
MCP_CLIENT_TIMEOUT,
MCP_HEALTH_CHECK_TIMEOUT,
MCP_METADATA_TIMEOUT,
MCP_TOOL_LISTING_TIMEOUT,
)
from litellm.exceptions import BlockedPiiEntityError, GuardrailRaisedException
from litellm.experimental_mcp_client.client import MCPClient
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
Expand Down Expand Up @@ -943,7 +949,7 @@ async def _create_mcp_client(
transport_type=transport,
auth_type=server.auth_type,
auth_value=auth_value,
timeout=60.0,
timeout=MCP_CLIENT_TIMEOUT,
stdio_config=stdio_config,
extra_headers=extra_headers,
)
Expand All @@ -955,7 +961,7 @@ async def _create_mcp_client(
transport_type=transport,
auth_type=server.auth_type,
auth_value=auth_value,
timeout=60.0,
timeout=MCP_CLIENT_TIMEOUT,
extra_headers=extra_headers,
)

Expand Down Expand Up @@ -1334,7 +1340,7 @@ async def _fetch_oauth_metadata_from_resource(
try:
client = get_async_httpx_client(
llm_provider=httpxSpecialProvider.MCP,
params={"timeout": 10.0},
params={"timeout": MCP_METADATA_TIMEOUT},
)
response = await client.get(resource_metadata_url)
response.raise_for_status()
Expand Down Expand Up @@ -1430,7 +1436,7 @@ async def _fetch_single_authorization_server_metadata(
try:
client = get_async_httpx_client(
llm_provider=httpxSpecialProvider.MCP,
params={"timeout": 10.0},
params={"timeout": MCP_METADATA_TIMEOUT},
)
response = await client.get(url)
response.raise_for_status()
Expand Down Expand Up @@ -1489,7 +1495,7 @@ async def _fetch_tools_with_timeout(
List of tools from the server
"""
try:
with anyio.fail_after(30.0):
with anyio.fail_after(MCP_TOOL_LISTING_TIMEOUT):
tools = await client.list_tools()
verbose_logger.debug(f"Tools from {server_name}: {tools}")
return tools
Expand Down Expand Up @@ -2508,10 +2514,14 @@ async def _noop(session):
return "ok"

# Add timeout wrapper to prevent hanging
await asyncio.wait_for(client.run_with_session(_noop), timeout=10.0)
await asyncio.wait_for(
client.run_with_session(_noop), timeout=MCP_HEALTH_CHECK_TIMEOUT
)
status = "healthy"
except asyncio.TimeoutError:
health_check_error = "Health check timed out after 10 seconds"
health_check_error = (
f"Health check timed out after {MCP_HEALTH_CHECK_TIMEOUT} seconds"
)
status = "unhealthy"
except asyncio.CancelledError:
health_check_error = "Health check was cancelled"
Expand Down
13 changes: 13 additions & 0 deletions tests/mcp_tests/test_mcp_client_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,19 @@
from mcp.types import Tool as MCPTool, CallToolResult as MCPCallToolResult


def test_mcp_client_uses_configurable_default_timeout():
"""MCPClient should use MCP_CLIENT_TIMEOUT constant when no timeout is passed."""
with patch(
"litellm.experimental_mcp_client.client.MCP_CLIENT_TIMEOUT", 120.0
):
# Client reads constant at runtime when timeout is None
client = MCPClient(
server_url="http://example.com",
transport_type=MCPTransport.sse,
)
assert client.timeout == 120.0


class TestMCPClientUnitTests:
"""Unit tests for MCPClient functionality."""

Expand Down
4 changes: 4 additions & 0 deletions tests/test_litellm/test_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ def test_all_numeric_constants_can_be_overridden():
# Constants that use a different env var name than the constant name
constant_to_env_var = {
"MAX_CALLBACKS": "LITELLM_MAX_CALLBACKS",
"MCP_CLIENT_TIMEOUT": "LITELLM_MCP_CLIENT_TIMEOUT",
"MCP_TOOL_LISTING_TIMEOUT": "LITELLM_MCP_TOOL_LISTING_TIMEOUT",
"MCP_METADATA_TIMEOUT": "LITELLM_MCP_METADATA_TIMEOUT",
"MCP_HEALTH_CHECK_TIMEOUT": "LITELLM_MCP_HEALTH_CHECK_TIMEOUT",
}

# Verify all numeric constants have environment variable support
Expand Down
Loading