Skip to content

Commit 1b4cfc2

Browse files
anencore94claude
andauthored
feat: add native Responses API support for hosted_vllm provider (#22298)
Register HostedVLLMResponsesAPIConfig so that litellm.responses(model="hosted_vllm/...") routes directly to vLLM's /v1/responses endpoint instead of falling back to the chat completions → responses conversion pipeline. Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent e3e9ac5 commit 1b4cfc2

File tree

5 files changed

+187
-19
lines changed

5 files changed

+187
-19
lines changed

litellm/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1516,6 +1516,7 @@ def set_global_gitlab_config(config: Dict[str, Any]) -> None:
15161516
from .llms.azure.completion.transformation import AzureOpenAITextConfig as AzureOpenAITextConfig
15171517
from .llms.hosted_vllm.chat.transformation import HostedVLLMChatConfig as HostedVLLMChatConfig
15181518
from .llms.hosted_vllm.embedding.transformation import HostedVLLMEmbeddingConfig as HostedVLLMEmbeddingConfig
1519+
from .llms.hosted_vllm.responses.transformation import HostedVLLMResponsesAPIConfig as HostedVLLMResponsesAPIConfig
15191520
from .llms.github_copilot.chat.transformation import GithubCopilotConfig as GithubCopilotConfig
15201521
from .llms.github_copilot.responses.transformation import GithubCopilotResponsesAPIConfig as GithubCopilotResponsesAPIConfig
15211522
from .llms.github_copilot.embedding.transformation import GithubCopilotEmbeddingConfig as GithubCopilotEmbeddingConfig

litellm/_lazy_imports_registry.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@
226226
"AzureOpenAIOSeriesResponsesAPIConfig",
227227
"XAIResponsesAPIConfig",
228228
"LiteLLMProxyResponsesAPIConfig",
229+
"HostedVLLMResponsesAPIConfig",
229230
"VolcEngineResponsesAPIConfig",
230231
"PerplexityResponsesConfig",
231232
"DatabricksResponsesAPIConfig",
@@ -897,6 +898,10 @@
897898
".llms.litellm_proxy.responses.transformation",
898899
"LiteLLMProxyResponsesAPIConfig",
899900
),
901+
"HostedVLLMResponsesAPIConfig": (
902+
".llms.hosted_vllm.responses.transformation",
903+
"HostedVLLMResponsesAPIConfig",
904+
),
900905
"VolcEngineResponsesAPIConfig": (
901906
".llms.volcengine.responses.transformation",
902907
"VolcEngineResponsesAPIConfig",
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""
2+
Responses API transformation for Hosted VLLM provider.
3+
4+
vLLM natively supports the OpenAI-compatible /v1/responses endpoint,
5+
so this config enables direct routing instead of falling back to
6+
the chat completions → responses conversion pipeline.
7+
"""
8+
9+
from typing import Optional
10+
11+
from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig
12+
from litellm.secret_managers.main import get_secret_str
13+
from litellm.types.router import GenericLiteLLMParams
14+
from litellm.types.utils import LlmProviders
15+
16+
17+
class HostedVLLMResponsesAPIConfig(OpenAIResponsesAPIConfig):
18+
"""
19+
Configuration for Hosted VLLM Responses API support.
20+
21+
Extends OpenAI's config since vLLM follows OpenAI's API spec,
22+
but uses HOSTED_VLLM_API_BASE for the base URL and defaults
23+
to "fake-api-key" when no API key is provided (vLLM does not
24+
require authentication by default).
25+
"""
26+
27+
@property
28+
def custom_llm_provider(self) -> LlmProviders:
29+
return LlmProviders.HOSTED_VLLM
30+
31+
def validate_environment(
32+
self,
33+
headers: dict,
34+
model: str,
35+
litellm_params: Optional[GenericLiteLLMParams],
36+
) -> dict:
37+
litellm_params = litellm_params or GenericLiteLLMParams()
38+
api_key = (
39+
litellm_params.api_key
40+
or get_secret_str("HOSTED_VLLM_API_KEY")
41+
or "fake-api-key"
42+
) # vllm does not require an api key
43+
headers.update(
44+
{
45+
"Authorization": f"Bearer {api_key}",
46+
}
47+
)
48+
return headers
49+
50+
def get_complete_url(
51+
self,
52+
api_base: Optional[str],
53+
litellm_params: dict,
54+
) -> str:
55+
api_base = api_base or get_secret_str("HOSTED_VLLM_API_BASE")
56+
57+
if api_base is None:
58+
raise ValueError(
59+
"api_base not set for Hosted VLLM responses API. "
60+
"Set via api_base parameter or HOSTED_VLLM_API_BASE environment variable"
61+
)
62+
63+
# Remove trailing slashes
64+
api_base = api_base.rstrip("/")
65+
66+
# If api_base already ends with /v1, append /responses
67+
# Otherwise append /v1/responses
68+
if api_base.endswith("/v1"):
69+
return f"{api_base}/responses"
70+
71+
return f"{api_base}/v1/responses"

litellm/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8310,6 +8310,8 @@ def get_provider_responses_api_config(
83108310
if model and "gpt" in model.lower():
83118311
return litellm.DatabricksResponsesAPIConfig()
83128312
return None
8313+
elif litellm.LlmProviders.HOSTED_VLLM == provider:
8314+
return litellm.HostedVLLMResponsesAPIConfig()
83138315
return None
83148316

83158317
@staticmethod

tests/test_litellm/llms/hosted_vllm/responses/test_hosted_vllm_responses.py

Lines changed: 108 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,27 +12,48 @@
1212
import sys
1313
from unittest.mock import MagicMock, patch
1414

15+
import pytest
16+
1517
sys.path.insert(
1618
0, os.path.abspath("../../../../..")
1719
) # Adds the parent directory to the system path
1820

1921
import litellm
22+
from litellm.llms.hosted_vllm.responses.transformation import (
23+
HostedVLLMResponsesAPIConfig,
24+
)
25+
from litellm.types.router import GenericLiteLLMParams
26+
from litellm.types.utils import LlmProviders
27+
from litellm.utils import ProviderConfigManager
2028

2129

22-
def _make_mock_chat_completion_response(content: str = "Hello! I'm doing well.") -> dict:
30+
def _make_mock_responses_api_response(content: str = "Hello! I'm doing well.") -> dict:
2331
return {
24-
"id": "chatcmpl-test123",
25-
"object": "chat.completion",
26-
"created": 1234567890,
32+
"id": "resp-test123",
33+
"object": "response",
34+
"created_at": 1234567890,
2735
"model": "Qwen/Qwen3-8B",
28-
"choices": [
36+
"output": [
2937
{
30-
"index": 0,
31-
"message": {"role": "assistant", "content": content},
32-
"finish_reason": "stop",
38+
"type": "message",
39+
"id": "msg-test123",
40+
"status": "completed",
41+
"role": "assistant",
42+
"content": [
43+
{
44+
"type": "output_text",
45+
"text": content,
46+
"annotations": [],
47+
}
48+
],
3349
}
3450
],
35-
"usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
51+
"status": "completed",
52+
"usage": {
53+
"input_tokens": 10,
54+
"output_tokens": 20,
55+
"total_tokens": 30,
56+
},
3657
}
3758

3859

@@ -49,18 +70,11 @@ def _make_mock_http_client(response_body: dict) -> MagicMock:
4970

5071
def test_hosted_vllm_responses_create_with_string_input():
5172
"""
52-
Regression test: responses.create() with string input must not raise
53-
TypeError: 'NoneType' object is not a mapping.
54-
55-
Root cause: extra_body=None was passed explicitly through the
56-
responses→completion pipeline. In add_provider_specific_params_to_optional_params(),
57-
passed_params.pop("extra_body", {}) returned None (key existed with value None),
58-
and **None raised TypeError at dict unpacking.
59-
60-
Fix: normalize None to {} for both extra_body and optional_params["extra_body"].
73+
Test that hosted_vllm routes directly to the native /v1/responses endpoint
74+
when the Responses API config is registered, and correctly parses the response.
6175
"""
6276
mock_client = _make_mock_http_client(
63-
_make_mock_chat_completion_response("I'm doing well, thanks!")
77+
_make_mock_responses_api_response("I'm doing well, thanks!")
6478
)
6579

6680
with patch(
@@ -101,3 +115,78 @@ def test_hosted_vllm_responses_create_with_explicit_none_extra_body():
101115

102116
# extra_body=None should be normalized to an empty dict (or absent)
103117
assert optional_params.get("extra_body") is not None or "extra_body" not in optional_params
118+
119+
120+
def test_hosted_vllm_provider_config_registration():
121+
"""Test that ProviderConfigManager returns HostedVLLMResponsesAPIConfig for hosted_vllm."""
122+
config = ProviderConfigManager.get_provider_responses_api_config(
123+
model="hosted_vllm/Qwen/Qwen3-8B",
124+
provider=LlmProviders.HOSTED_VLLM,
125+
)
126+
127+
assert config is not None
128+
assert isinstance(config, HostedVLLMResponsesAPIConfig)
129+
assert config.custom_llm_provider == LlmProviders.HOSTED_VLLM
130+
131+
132+
def test_hosted_vllm_responses_api_url():
133+
"""Test get_complete_url() constructs the correct URL."""
134+
config = HostedVLLMResponsesAPIConfig()
135+
136+
# api_base without /v1
137+
url = config.get_complete_url(
138+
api_base="http://localhost:8000",
139+
litellm_params={},
140+
)
141+
assert url == "http://localhost:8000/v1/responses"
142+
143+
# api_base with /v1
144+
url_with_v1 = config.get_complete_url(
145+
api_base="http://localhost:8000/v1",
146+
litellm_params={},
147+
)
148+
assert url_with_v1 == "http://localhost:8000/v1/responses"
149+
150+
# api_base with trailing slash
151+
url_with_slash = config.get_complete_url(
152+
api_base="http://localhost:8000/v1/",
153+
litellm_params={},
154+
)
155+
assert url_with_slash == "http://localhost:8000/v1/responses"
156+
157+
158+
def test_hosted_vllm_responses_api_url_requires_api_base():
159+
"""Test get_complete_url() raises ValueError when api_base is not set."""
160+
config = HostedVLLMResponsesAPIConfig()
161+
162+
with pytest.raises(ValueError, match="api_base not set"):
163+
config.get_complete_url(
164+
api_base=None,
165+
litellm_params={},
166+
)
167+
168+
169+
def test_hosted_vllm_validate_environment_default_api_key():
170+
"""Test validate_environment() defaults to 'fake-api-key' when no key is provided."""
171+
config = HostedVLLMResponsesAPIConfig()
172+
173+
headers = config.validate_environment(
174+
headers={},
175+
model="Qwen/Qwen3-8B",
176+
litellm_params=GenericLiteLLMParams(),
177+
)
178+
179+
assert headers.get("Authorization") == "Bearer fake-api-key"
180+
181+
182+
def test_hosted_vllm_validate_environment_custom_api_key():
183+
"""Test validate_environment() uses the provided api_key."""
184+
config = HostedVLLMResponsesAPIConfig()
185+
186+
headers = config.validate_environment(
187+
headers={},
188+
model="Qwen/Qwen3-8B",
189+
litellm_params=GenericLiteLLMParams(api_key="my-custom-key"),
190+
)
191+
192+
assert headers.get("Authorization") == "Bearer my-custom-key"

0 commit comments

Comments
 (0)