Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions litellm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1516,6 +1516,7 @@ def set_global_gitlab_config(config: Dict[str, Any]) -> None:
from .llms.azure.completion.transformation import AzureOpenAITextConfig as AzureOpenAITextConfig
from .llms.hosted_vllm.chat.transformation import HostedVLLMChatConfig as HostedVLLMChatConfig
from .llms.hosted_vllm.embedding.transformation import HostedVLLMEmbeddingConfig as HostedVLLMEmbeddingConfig
from .llms.hosted_vllm.responses.transformation import HostedVLLMResponsesAPIConfig as HostedVLLMResponsesAPIConfig
from .llms.github_copilot.chat.transformation import GithubCopilotConfig as GithubCopilotConfig
from .llms.github_copilot.responses.transformation import GithubCopilotResponsesAPIConfig as GithubCopilotResponsesAPIConfig
from .llms.github_copilot.embedding.transformation import GithubCopilotEmbeddingConfig as GithubCopilotEmbeddingConfig
Expand Down
5 changes: 5 additions & 0 deletions litellm/_lazy_imports_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@
"AzureOpenAIOSeriesResponsesAPIConfig",
"XAIResponsesAPIConfig",
"LiteLLMProxyResponsesAPIConfig",
"HostedVLLMResponsesAPIConfig",
"VolcEngineResponsesAPIConfig",
"PerplexityResponsesConfig",
"DatabricksResponsesAPIConfig",
Expand Down Expand Up @@ -897,6 +898,10 @@
".llms.litellm_proxy.responses.transformation",
"LiteLLMProxyResponsesAPIConfig",
),
"HostedVLLMResponsesAPIConfig": (
".llms.hosted_vllm.responses.transformation",
"HostedVLLMResponsesAPIConfig",
),
"VolcEngineResponsesAPIConfig": (
".llms.volcengine.responses.transformation",
"VolcEngineResponsesAPIConfig",
Expand Down
71 changes: 71 additions & 0 deletions litellm/llms/hosted_vllm/responses/transformation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""
Responses API transformation for Hosted VLLM provider.
vLLM natively supports the OpenAI-compatible /v1/responses endpoint,
so this config enables direct routing instead of falling back to
the chat completions β†’ responses conversion pipeline.
"""

from typing import Optional

from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig
from litellm.secret_managers.main import get_secret_str
from litellm.types.router import GenericLiteLLMParams
from litellm.types.utils import LlmProviders


class HostedVLLMResponsesAPIConfig(OpenAIResponsesAPIConfig):
"""
Configuration for Hosted VLLM Responses API support.
Extends OpenAI's config since vLLM follows OpenAI's API spec,
but uses HOSTED_VLLM_API_BASE for the base URL and defaults
to "fake-api-key" when no API key is provided (vLLM does not
require authentication by default).
"""

@property
def custom_llm_provider(self) -> LlmProviders:
return LlmProviders.HOSTED_VLLM

def validate_environment(
self,
headers: dict,
model: str,
litellm_params: Optional[GenericLiteLLMParams],
) -> dict:
litellm_params = litellm_params or GenericLiteLLMParams()
api_key = (
litellm_params.api_key
or get_secret_str("HOSTED_VLLM_API_KEY")
or "fake-api-key"
) # vllm does not require an api key
headers.update(
{
"Authorization": f"Bearer {api_key}",
}
)
return headers

def get_complete_url(
self,
api_base: Optional[str],
litellm_params: dict,
) -> str:
api_base = api_base or get_secret_str("HOSTED_VLLM_API_BASE")

if api_base is None:
raise ValueError(
"api_base not set for Hosted VLLM responses API. "
"Set via api_base parameter or HOSTED_VLLM_API_BASE environment variable"
)

# Remove trailing slashes
api_base = api_base.rstrip("/")

# If api_base already ends with /v1, append /responses
# Otherwise append /v1/responses
if api_base.endswith("/v1"):
return f"{api_base}/responses"

return f"{api_base}/v1/responses"
2 changes: 2 additions & 0 deletions litellm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8310,6 +8310,8 @@ def get_provider_responses_api_config(
if model and "gpt" in model.lower():
return litellm.DatabricksResponsesAPIConfig()
return None
elif litellm.LlmProviders.HOSTED_VLLM == provider:
return litellm.HostedVLLMResponsesAPIConfig()
return None

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,48 @@
import sys
from unittest.mock import MagicMock, patch

import pytest

sys.path.insert(
0, os.path.abspath("../../../../..")
) # Adds the parent directory to the system path

import litellm
from litellm.llms.hosted_vllm.responses.transformation import (
HostedVLLMResponsesAPIConfig,
)
from litellm.types.router import GenericLiteLLMParams
from litellm.types.utils import LlmProviders
from litellm.utils import ProviderConfigManager


def _make_mock_chat_completion_response(content: str = "Hello! I'm doing well.") -> dict:
def _make_mock_responses_api_response(content: str = "Hello! I'm doing well.") -> dict:
return {
"id": "chatcmpl-test123",
"object": "chat.completion",
"created": 1234567890,
"id": "resp-test123",
"object": "response",
"created_at": 1234567890,
"model": "Qwen/Qwen3-8B",
"choices": [
"output": [
{
"index": 0,
"message": {"role": "assistant", "content": content},
"finish_reason": "stop",
"type": "message",
"id": "msg-test123",
"status": "completed",
"role": "assistant",
"content": [
{
"type": "output_text",
"text": content,
"annotations": [],
}
],
}
],
"usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
"status": "completed",
"usage": {
"input_tokens": 10,
"output_tokens": 20,
"total_tokens": 30,
},
}


Expand All @@ -49,18 +70,11 @@ def _make_mock_http_client(response_body: dict) -> MagicMock:

def test_hosted_vllm_responses_create_with_string_input():
"""
Regression test: responses.create() with string input must not raise
TypeError: 'NoneType' object is not a mapping.
Root cause: extra_body=None was passed explicitly through the
responses→completion pipeline. In add_provider_specific_params_to_optional_params(),
passed_params.pop("extra_body", {}) returned None (key existed with value None),
and **None raised TypeError at dict unpacking.
Fix: normalize None to {} for both extra_body and optional_params["extra_body"].
Test that hosted_vllm routes directly to the native /v1/responses endpoint
when the Responses API config is registered, and correctly parses the response.
"""
mock_client = _make_mock_http_client(
_make_mock_chat_completion_response("I'm doing well, thanks!")
_make_mock_responses_api_response("I'm doing well, thanks!")
)

with patch(
Expand Down Expand Up @@ -101,3 +115,78 @@ def test_hosted_vllm_responses_create_with_explicit_none_extra_body():

# extra_body=None should be normalized to an empty dict (or absent)
assert optional_params.get("extra_body") is not None or "extra_body" not in optional_params


def test_hosted_vllm_provider_config_registration():
"""Test that ProviderConfigManager returns HostedVLLMResponsesAPIConfig for hosted_vllm."""
config = ProviderConfigManager.get_provider_responses_api_config(
model="hosted_vllm/Qwen/Qwen3-8B",
provider=LlmProviders.HOSTED_VLLM,
)

assert config is not None
assert isinstance(config, HostedVLLMResponsesAPIConfig)
assert config.custom_llm_provider == LlmProviders.HOSTED_VLLM


def test_hosted_vllm_responses_api_url():
"""Test get_complete_url() constructs the correct URL."""
config = HostedVLLMResponsesAPIConfig()

# api_base without /v1
url = config.get_complete_url(
api_base="http://localhost:8000",
litellm_params={},
)
assert url == "http://localhost:8000/v1/responses"

# api_base with /v1
url_with_v1 = config.get_complete_url(
api_base="http://localhost:8000/v1",
litellm_params={},
)
assert url_with_v1 == "http://localhost:8000/v1/responses"

# api_base with trailing slash
url_with_slash = config.get_complete_url(
api_base="http://localhost:8000/v1/",
litellm_params={},
)
assert url_with_slash == "http://localhost:8000/v1/responses"


def test_hosted_vllm_responses_api_url_requires_api_base():
"""Test get_complete_url() raises ValueError when api_base is not set."""
config = HostedVLLMResponsesAPIConfig()

with pytest.raises(ValueError, match="api_base not set"):
config.get_complete_url(
api_base=None,
litellm_params={},
)
Comment on lines +158 to +166
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test is fragile when env var is set

test_hosted_vllm_responses_api_url_requires_api_base will not raise ValueError if the HOSTED_VLLM_API_BASE environment variable is set in the test runner's environment (e.g., CI), because get_secret_str("HOSTED_VLLM_API_BASE") will return a value before the None check. Consider patching get_secret_str to return None, or using monkeypatch.delenv to ensure the env var is unset:

Suggested change
def test_hosted_vllm_responses_api_url_requires_api_base():
"""Test get_complete_url() raises ValueError when api_base is not set."""
config = HostedVLLMResponsesAPIConfig()
with pytest.raises(ValueError, match="api_base not set"):
config.get_complete_url(
api_base=None,
litellm_params={},
)
def test_hosted_vllm_responses_api_url_requires_api_base(monkeypatch):
"""Test get_complete_url() raises ValueError when api_base is not set."""
monkeypatch.delenv("HOSTED_VLLM_API_BASE", raising=False)
config = HostedVLLMResponsesAPIConfig()
with pytest.raises(ValueError, match="api_base not set"):
config.get_complete_url(
api_base=None,
litellm_params={},
)



def test_hosted_vllm_validate_environment_default_api_key():
"""Test validate_environment() defaults to 'fake-api-key' when no key is provided."""
config = HostedVLLMResponsesAPIConfig()

headers = config.validate_environment(
headers={},
model="Qwen/Qwen3-8B",
litellm_params=GenericLiteLLMParams(),
)

assert headers.get("Authorization") == "Bearer fake-api-key"
Comment on lines +169 to +179
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test is fragile when env var is set

test_hosted_vllm_validate_environment_default_api_key will fail if HOSTED_VLLM_API_KEY is set in the test environment, because get_secret_str("HOSTED_VLLM_API_KEY") will return a real value instead of falling through to "fake-api-key". Consider clearing the env var:

Suggested change
def test_hosted_vllm_validate_environment_default_api_key():
"""Test validate_environment() defaults to 'fake-api-key' when no key is provided."""
config = HostedVLLMResponsesAPIConfig()
headers = config.validate_environment(
headers={},
model="Qwen/Qwen3-8B",
litellm_params=GenericLiteLLMParams(),
)
assert headers.get("Authorization") == "Bearer fake-api-key"
def test_hosted_vllm_validate_environment_default_api_key(monkeypatch):
"""Test validate_environment() defaults to 'fake-api-key' when no key is provided."""
monkeypatch.delenv("HOSTED_VLLM_API_KEY", raising=False)
config = HostedVLLMResponsesAPIConfig()
headers = config.validate_environment(
headers={},
model="Qwen/Qwen3-8B",
litellm_params=GenericLiteLLMParams(),
)
assert headers.get("Authorization") == "Bearer fake-api-key"



def test_hosted_vllm_validate_environment_custom_api_key():
"""Test validate_environment() uses the provided api_key."""
config = HostedVLLMResponsesAPIConfig()

headers = config.validate_environment(
headers={},
model="Qwen/Qwen3-8B",
litellm_params=GenericLiteLLMParams(api_key="my-custom-key"),
)

assert headers.get("Authorization") == "Bearer my-custom-key"
Loading