From c58aea4888465f5a7815288a0ed2aaada0cb4f9c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 27 Feb 2026 10:50:58 +0000 Subject: [PATCH 1/4] chore: regenerate poetry.lock to match pyproject.toml (#1) Co-authored-by: github-actions[bot] --- poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 34227a69ccb1..0314a3605424 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3222,15 +3222,15 @@ files = [ [[package]] name = "litellm-proxy-extras" -version = "0.4.48" +version = "0.4.49" description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package." optional = true python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" groups = ["main"] markers = "extra == \"proxy\"" files = [ - {file = "litellm_proxy_extras-0.4.48-py3-none-any.whl", hash = "sha256:097001fccec5dbf4cffd902114898a9cfeba62673202447d55d2d0286cf93126"}, - {file = "litellm_proxy_extras-0.4.48.tar.gz", hash = "sha256:5d5d8acf31b92d0cd6738555fb4a2411819755155438de9fb23c724c356400a2"}, + {file = "litellm_proxy_extras-0.4.49-py3-none-any.whl", hash = "sha256:aeb0e08b4705c19fdc5b75a43c608a82fc36032f6d83be509dbf37baea62f2cd"}, + {file = "litellm_proxy_extras-0.4.49.tar.gz", hash = "sha256:d9bdae54d1e3398f2e2025c9d8b98a19e226874337d540d5415922d7dbbc97bb"}, ] [[package]] @@ -7989,4 +7989,4 @@ utils = ["numpydoc"] [metadata] lock-version = "2.1" python-versions = ">=3.9,<4.0" -content-hash = "b9b1e47b3b84748c0053be6a544c2399bf2601746a4f88dcb1be7c5e4eeab359" +content-hash = "bbc7d43f5484af4c8877fe66e34f8283069528379af49d573036ba144cc2eb7a" From 0edf31a32928ff912454f0ae97fc9e463c8fc282 Mon Sep 17 00:00:00 2001 From: Harshit28j Date: Fri, 27 Feb 2026 21:54:26 +0530 Subject: [PATCH 2/4] fix: api key trace issues --- litellm/litellm_core_utils/litellm_logging.py | 15 +++- .../anthropic_passthrough_logging_handler.py | 3 +- .../openai_passthrough_logging_handler.py | 20 +++-- .../vertex_passthrough_logging_handler.py | 11 +-- .../pass_through_endpoints.py | 2 + .../streaming_handler.py | 18 ++-- .../test_unit_test_streaming.py | 86 +++++++++++++++++++ .../test_litellm_logging.py | 54 ++++++++++++ 8 files changed, 187 insertions(+), 22 deletions(-) diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index e450b233c7e7..9b8eff707f9d 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -2506,11 +2506,18 @@ async def async_success_handler( # noqa: PLR0915 self.model_call_details["async_complete_streaming_response"] = result - # Only set response_cost to None if not already calculated by - # pass-through handlers (e.g. Gemini/Vertex handlers already - # compute cost via completion_cost) + # Merge response_cost and model from kwargs if available. + # Streaming pass-through handlers compute cost and return it + # in kwargs, but it needs to be set on model_call_details for + # the standard logging payload builder to pick it up. if self.model_call_details.get("response_cost") is None: - self.model_call_details["response_cost"] = None + response_cost_from_kwargs = kwargs.get("response_cost") + if response_cost_from_kwargs is not None: + self.model_call_details["response_cost"] = response_cost_from_kwargs + else: + self.model_call_details["response_cost"] = None + if kwargs.get("model") and not self.model_call_details.get("model"): + self.model_call_details["model"] = kwargs["model"] # Only build standard_logging_object if not already built by # _success_handler_helper_fn diff --git a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py b/litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py index e70d6cb7fcae..73bbf417bb43 100644 --- a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py +++ b/litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py @@ -176,6 +176,7 @@ def _handle_logging_anthropic_collected_chunks( start_time: datetime, all_chunks: List[str], end_time: datetime, + kwargs: Optional[dict] = None, ) -> PassThroughEndpointLoggingTypedDict: """ Takes raw chunks from Anthropic passthrough endpoint and logs them in litellm callbacks @@ -212,7 +213,7 @@ def _handle_logging_anthropic_collected_chunks( kwargs = AnthropicPassthroughLoggingHandler._create_anthropic_response_logging_payload( litellm_model_response=complete_streaming_response, model=model, - kwargs={}, + kwargs=kwargs or {}, start_time=start_time, end_time=end_time, logging_obj=litellm_logging_obj, diff --git a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/openai_passthrough_logging_handler.py b/litellm/proxy/pass_through_endpoints/llm_provider_handlers/openai_passthrough_logging_handler.py index 6745c559cd25..fab64d4f0bbd 100644 --- a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/openai_passthrough_logging_handler.py +++ b/litellm/proxy/pass_through_endpoints/llm_provider_handlers/openai_passthrough_logging_handler.py @@ -499,6 +499,7 @@ def _handle_logging_openai_collected_chunks( start_time: datetime, all_chunks: List[str], end_time: datetime, + kwargs: Optional[dict] = None, ) -> PassThroughEndpointLoggingTypedDict: """ Handle logging for collected OpenAI streaming chunks with cost tracking. @@ -535,23 +536,30 @@ def _handle_logging_openai_collected_chunks( custom_llm_provider=custom_llm_provider, ) - # Preserve existing litellm_params to maintain metadata tags - existing_litellm_params = litellm_logging_obj.model_call_details.get( + # Preserve existing litellm_params from passed kwargs or logging object + incoming_kwargs = kwargs or {} + existing_litellm_params = incoming_kwargs.get( + "litellm_params" + ) or litellm_logging_obj.model_call_details.get( "litellm_params", {} ) or {} - + # Prepare kwargs for logging kwargs = { "response_cost": response_cost, "model": model, "custom_llm_provider": custom_llm_provider, - "litellm_params": existing_litellm_params.copy(), + "litellm_params": existing_litellm_params.copy() if isinstance(existing_litellm_params, dict) else {}, + "call_type": incoming_kwargs.get("call_type", "pass_through_endpoint"), + "litellm_call_id": incoming_kwargs.get("litellm_call_id"), } - # Extract user information for tracking + # Extract user information from passed kwargs or logging object passthrough_logging_payload: Optional[ PassthroughStandardLoggingPayload - ] = litellm_logging_obj.model_call_details.get( + ] = incoming_kwargs.get( + "passthrough_logging_payload" + ) or litellm_logging_obj.model_call_details.get( "passthrough_logging_payload" ) if passthrough_logging_payload: diff --git a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py b/litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py index 3d5c529a3bbb..77fcb28f8628 100644 --- a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py +++ b/litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py @@ -333,6 +333,7 @@ def _handle_logging_vertex_collected_chunks( all_chunks: List[str], model: Optional[str], end_time: datetime, + kwargs: Optional[dict] = None, ) -> PassThroughEndpointLoggingTypedDict: """ Takes raw chunks from Vertex passthrough endpoint and logs them in litellm callbacks @@ -341,7 +342,7 @@ def _handle_logging_vertex_collected_chunks( - Creates standard logging object - Logs in litellm callbacks """ - kwargs: Dict[str, Any] = {} + _kwargs: Dict[str, Any] = kwargs or {} model = model or VertexPassthroughLoggingHandler.extract_model_from_url( url_route ) @@ -360,13 +361,13 @@ def _handle_logging_vertex_collected_chunks( ) return { "result": None, - "kwargs": kwargs, + "kwargs": _kwargs, } - kwargs = VertexPassthroughLoggingHandler._create_vertex_response_logging_payload_for_generate_content( + _kwargs = VertexPassthroughLoggingHandler._create_vertex_response_logging_payload_for_generate_content( litellm_model_response=complete_streaming_response, model=model, - kwargs=kwargs, + kwargs=_kwargs, start_time=start_time, end_time=end_time, logging_obj=litellm_logging_obj, @@ -377,7 +378,7 @@ def _handle_logging_vertex_collected_chunks( return { "result": complete_streaming_response, - "kwargs": kwargs, + "kwargs": _kwargs, } @staticmethod diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py index 356807415de9..83d2d631fc99 100644 --- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py +++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py @@ -830,6 +830,7 @@ async def pass_through_request( # noqa: PLR0915 start_time=start_time, passthrough_success_handler_obj=pass_through_endpoint_logging, url_route=str(url), + kwargs=kwargs, ), headers=HttpPassThroughEndpointHelpers.get_response_headers( headers=response.headers, @@ -867,6 +868,7 @@ async def pass_through_request( # noqa: PLR0915 start_time=start_time, passthrough_success_handler_obj=pass_through_endpoint_logging, url_route=str(url), + kwargs=kwargs, ), headers=HttpPassThroughEndpointHelpers.get_response_headers( headers=response.headers, diff --git a/litellm/proxy/pass_through_endpoints/streaming_handler.py b/litellm/proxy/pass_through_endpoints/streaming_handler.py index d1b7c8962ee0..52b94957daf7 100644 --- a/litellm/proxy/pass_through_endpoints/streaming_handler.py +++ b/litellm/proxy/pass_through_endpoints/streaming_handler.py @@ -35,6 +35,7 @@ async def chunk_processor( start_time: datetime, passthrough_success_handler_obj: PassThroughEndpointLogging, url_route: str, + kwargs: Optional[dict] = None, ): """ - Yields chunks from the response @@ -83,6 +84,7 @@ async def chunk_processor( start_time=start_time, raw_bytes=raw_bytes, end_time=end_time, + kwargs=kwargs, ) ) except Exception as e: @@ -100,6 +102,7 @@ async def _route_streaming_logging_to_handler( raw_bytes: List[bytes], end_time: datetime, model: Optional[str] = None, + kwargs: Optional[dict] = None, ): """ Route the logging for the collected chunks to the appropriate handler @@ -115,7 +118,7 @@ async def _route_streaming_logging_to_handler( standard_logging_response_object: Optional[ PassThroughEndpointLoggingResultValues ] = None - kwargs: dict = {} + handler_kwargs: dict = {} if endpoint_type == EndpointType.ANTHROPIC: anthropic_passthrough_logging_handler_result = AnthropicPassthroughLoggingHandler._handle_logging_anthropic_collected_chunks( litellm_logging_obj=litellm_logging_obj, @@ -126,11 +129,12 @@ async def _route_streaming_logging_to_handler( start_time=start_time, all_chunks=all_chunks, end_time=end_time, + kwargs=kwargs, ) standard_logging_response_object = ( anthropic_passthrough_logging_handler_result["result"] ) - kwargs = anthropic_passthrough_logging_handler_result["kwargs"] + handler_kwargs = anthropic_passthrough_logging_handler_result["kwargs"] elif endpoint_type == EndpointType.VERTEX_AI: vertex_passthrough_logging_handler_result = ( VertexPassthroughLoggingHandler._handle_logging_vertex_collected_chunks( @@ -143,12 +147,13 @@ async def _route_streaming_logging_to_handler( all_chunks=all_chunks, end_time=end_time, model=model, + kwargs=kwargs, ) ) standard_logging_response_object = ( vertex_passthrough_logging_handler_result["result"] ) - kwargs = vertex_passthrough_logging_handler_result["kwargs"] + handler_kwargs = vertex_passthrough_logging_handler_result["kwargs"] elif endpoint_type == EndpointType.OPENAI: openai_passthrough_logging_handler_result = ( OpenAIPassthroughLoggingHandler._handle_logging_openai_collected_chunks( @@ -160,12 +165,13 @@ async def _route_streaming_logging_to_handler( start_time=start_time, all_chunks=all_chunks, end_time=end_time, + kwargs=kwargs, ) ) standard_logging_response_object = ( openai_passthrough_logging_handler_result["result"] ) - kwargs = openai_passthrough_logging_handler_result["kwargs"] + handler_kwargs = openai_passthrough_logging_handler_result["kwargs"] if standard_logging_response_object is None: standard_logging_response_object = StandardPassThroughResponseObject( @@ -176,7 +182,7 @@ async def _route_streaming_logging_to_handler( start_time=start_time, end_time=end_time, cache_hit=False, - **kwargs, + **handler_kwargs, ) if litellm_logging_obj._should_run_sync_callbacks_for_async_calls() is False: return @@ -187,7 +193,7 @@ async def _route_streaming_logging_to_handler( end_time=end_time, cache_hit=False, start_time=start_time, - **kwargs, + **handler_kwargs, ) @staticmethod diff --git a/tests/pass_through_unit_tests/test_unit_test_streaming.py b/tests/pass_through_unit_tests/test_unit_test_streaming.py index d3e0b6b0b06c..8b9633a20deb 100644 --- a/tests/pass_through_unit_tests/test_unit_test_streaming.py +++ b/tests/pass_through_unit_tests/test_unit_test_streaming.py @@ -95,6 +95,92 @@ async def mock_aiter_bytes(): ), "Collected chunks do not match raw chunks" +@pytest.mark.asyncio +async def test_chunk_processor_passes_kwargs_to_logging_handler(): + """ + Test that kwargs (containing litellm_params with API key metadata) are + propagated from chunk_processor through to _route_streaming_logging_to_handler. + + This ensures API key attribution reaches Langfuse traces for streaming + pass-through requests (e.g., Claude Code hitting /anthropic/v1/messages). + """ + response = AsyncMock(spec=httpx.Response) + + # Minimal streaming response with message_start and message_stop events + raw_chunks = [ + b'event: message_start\ndata: {"type":"message_start","message":{"id":"msg_123","type":"message","role":"assistant","content":[],"model":"claude-3-haiku-20240307","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":1}}}\n\n', + b'event: content_block_start\ndata: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}\n\n', + b'event: content_block_delta\ndata: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}}\n\n', + b'event: content_block_stop\ndata: {"type":"content_block_stop","index":0}\n\n', + b'event: message_delta\ndata: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":5}}\n\n', + b'event: message_stop\ndata: {"type":"message_stop"}\n\n', + ] + + async def mock_aiter_bytes(): + for chunk in raw_chunks: + yield chunk + + response.aiter_bytes = mock_aiter_bytes + + request_body = {"model": "claude-3-haiku-20240307", "messages": [{"role": "user", "content": "Hi"}]} + litellm_logging_obj = MagicMock() + litellm_logging_obj.async_success_handler = AsyncMock() + litellm_logging_obj._should_run_sync_callbacks_for_async_calls = MagicMock(return_value=False) + litellm_logging_obj.model_call_details = {} + start_time = datetime.now() + passthrough_success_handler_obj = MagicMock() + + # The kwargs that should be threaded through — simulating what + # _init_kwargs_for_pass_through_endpoint() creates + input_kwargs = { + "litellm_params": { + "metadata": { + "user_api_key_hash": "sk-hashed-abc123", + "user_api_key_alias": "test-key-alias", + "user_api_key_team_id": "team-456", + "user_api_key_user_id": "user-789", + "user_api_key_org_id": "org-012", + }, + "proxy_server_request": { + "url": "https://proxy/anthropic/v1/messages", + "method": "POST", + "body": request_body, + }, + }, + "passthrough_logging_payload": PassthroughStandardLoggingPayload( + url="https://api.anthropic.com/v1/messages", + request_body=request_body, + ), + "call_type": "pass_through_endpoint", + "litellm_call_id": "call-test-123", + } + + # Consume the async generator + async for _ in PassThroughStreamingHandler.chunk_processor( + response=response, + request_body=request_body, + litellm_logging_obj=litellm_logging_obj, + endpoint_type=EndpointType.ANTHROPIC, + start_time=start_time, + passthrough_success_handler_obj=passthrough_success_handler_obj, + url_route="/v1/messages", + kwargs=input_kwargs, + ): + pass + + # Allow the asyncio.create_task to run + import asyncio + await asyncio.sleep(0.5) + + # Verify async_success_handler was called with kwargs containing + # the API key metadata from input_kwargs + if litellm_logging_obj.async_success_handler.called: + call_kwargs = litellm_logging_obj.async_success_handler.call_args + # The handler_kwargs are spread as **kwargs, check they include response_cost + # (set by the Anthropic handler) and that litellm_params metadata was preserved + assert call_kwargs is not None, "async_success_handler was called but with no args" + + def test_convert_raw_bytes_to_str_lines(): """ Test that the _convert_raw_bytes_to_str_lines method correctly converts raw bytes to a list of strings diff --git a/tests/test_litellm/litellm_core_utils/test_litellm_logging.py b/tests/test_litellm/litellm_core_utils/test_litellm_logging.py index 28624ea8b202..5e638ed45c4c 100644 --- a/tests/test_litellm/litellm_core_utils/test_litellm_logging.py +++ b/tests/test_litellm/litellm_core_utils/test_litellm_logging.py @@ -1680,3 +1680,57 @@ async def test_async_success_handler_preserves_response_cost_for_pass_through_en slo = logging_obj.model_call_details.get("standard_logging_object") assert slo is not None assert slo["response_cost"] > 0 + + +@pytest.mark.asyncio +async def test_async_success_handler_merges_response_cost_from_kwargs_for_passthrough(): + """ + Test that async_success_handler merges response_cost from **kwargs into + model_call_details for pass_through_endpoint calls. + + This verifies the fix for streaming pass-through endpoints where the + provider handler computes response_cost and returns it in kwargs, but + it was never being merged into model_call_details. + """ + logging_obj = LitellmLogging( + model="unknown", + messages=[], + stream=False, + call_type="pass_through_endpoint", + start_time=time.time(), + litellm_call_id="test-passthrough-123", + function_id="1245", + ) + + # Simulate update_environment_variables having set litellm_params with metadata + logging_obj.model_call_details["litellm_params"] = { + "metadata": { + "user_api_key_hash": "sk-hashed-test", + "user_api_key_alias": "my-test-key", + "user_api_key_team_id": "team-abc", + "user_api_key_user_id": "user-xyz", + } + } + + result = "test result" + + # Call async_success_handler with response_cost in kwargs + # (simulating what the Anthropic streaming handler returns) + await logging_obj.async_success_handler( + result=result, + start_time=time.time(), + end_time=time.time(), + cache_hit=False, + response_cost=0.0042, + model="claude-3-haiku-20240307", + ) + + # response_cost from kwargs should be merged into model_call_details + assert logging_obj.model_call_details.get("response_cost") == 0.0042 + + # standard_logging_object should exist and have the metadata + slo = logging_obj.model_call_details.get("standard_logging_object") + assert slo is not None + assert slo["metadata"].get("user_api_key_hash") == "sk-hashed-test" + assert slo["metadata"].get("user_api_key_alias") == "my-test-key" + assert slo["metadata"].get("user_api_key_team_id") == "team-abc" From b92f9cb35224a9c27778e122ed0bd6a21eb6814d Mon Sep 17 00:00:00 2001 From: Harshit Jain <48647625+Harshit28j@users.noreply.github.com> Date: Fri, 27 Feb 2026 22:10:33 +0530 Subject: [PATCH 3/4] Update tests/pass_through_unit_tests/test_unit_test_streaming.py Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- .../test_unit_test_streaming.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/pass_through_unit_tests/test_unit_test_streaming.py b/tests/pass_through_unit_tests/test_unit_test_streaming.py index 8b9633a20deb..f4f4f53dfe3b 100644 --- a/tests/pass_through_unit_tests/test_unit_test_streaming.py +++ b/tests/pass_through_unit_tests/test_unit_test_streaming.py @@ -174,11 +174,12 @@ async def mock_aiter_bytes(): # Verify async_success_handler was called with kwargs containing # the API key metadata from input_kwargs - if litellm_logging_obj.async_success_handler.called: - call_kwargs = litellm_logging_obj.async_success_handler.call_args - # The handler_kwargs are spread as **kwargs, check they include response_cost - # (set by the Anthropic handler) and that litellm_params metadata was preserved - assert call_kwargs is not None, "async_success_handler was called but with no args" + assert litellm_logging_obj.async_success_handler.called, \ + "async_success_handler should have been called after streaming completed" + call_kwargs = litellm_logging_obj.async_success_handler.call_args + # The handler_kwargs are spread as **kwargs, check they include response_cost + # (set by the Anthropic handler) and that litellm_params metadata was preserved + assert call_kwargs is not None, "async_success_handler was called but with no args" def test_convert_raw_bytes_to_str_lines(): From f67cb1a357aba903d637f16afdacd07976690abc Mon Sep 17 00:00:00 2001 From: Harshit Jain <48647625+Harshit28j@users.noreply.github.com> Date: Fri, 27 Feb 2026 22:10:41 +0530 Subject: [PATCH 4/4] Update tests/pass_through_unit_tests/test_unit_test_streaming.py Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- tests/pass_through_unit_tests/test_unit_test_streaming.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/pass_through_unit_tests/test_unit_test_streaming.py b/tests/pass_through_unit_tests/test_unit_test_streaming.py index f4f4f53dfe3b..7276a677b7c7 100644 --- a/tests/pass_through_unit_tests/test_unit_test_streaming.py +++ b/tests/pass_through_unit_tests/test_unit_test_streaming.py @@ -169,7 +169,6 @@ async def mock_aiter_bytes(): pass # Allow the asyncio.create_task to run - import asyncio await asyncio.sleep(0.5) # Verify async_success_handler was called with kwargs containing