diff --git a/pyproject.toml b/pyproject.toml index 2fca1c3a6..6781fcda6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "GitAuto" -version = "1.40.1" +version = "1.47.0" requires-python = ">=3.14" dependencies = [ "annotated-doc==0.0.4", diff --git a/services/github/token/test_get_installation_token.py b/services/github/token/test_get_installation_token.py index e917c5937..7a234f05e 100644 --- a/services/github/token/test_get_installation_token.py +++ b/services/github/token/test_get_installation_token.py @@ -124,6 +124,9 @@ def test_get_installation_access_token_403_without_suspension_message( mock_response.status_code = 403 mock_response.text = "Forbidden - different reason" mock_response.reason = "Forbidden" + # Explicit empty headers — otherwise MagicMock's auto-attributes make the + # rate-limit extractor see a phantom Retry-After (MagicMock.__float__ returns 1.0). + mock_response.headers = {} mock_error = requests.exceptions.HTTPError(response=mock_response) mock_error.response = mock_response mock_requests_post.return_value.raise_for_status.side_effect = mock_error @@ -146,6 +149,7 @@ def test_get_installation_access_token_other_http_error( mock_response.status_code = 500 mock_response.text = "Internal Server Error" mock_response.reason = "Internal Server Error" + mock_response.headers = {} mock_error = requests.exceptions.HTTPError(response=mock_response) mock_error.response = mock_response mock_requests_post.return_value.raise_for_status.side_effect = mock_error diff --git a/services/google_ai/chat_with_google.py b/services/google_ai/chat_with_google.py index 2f355cead..f5e3da248 100644 --- a/services/google_ai/chat_with_google.py +++ b/services/google_ai/chat_with_google.py @@ -58,12 +58,28 @@ def chat_with_google( content_list = [] if response.candidates: + logger.info( + "chat_with_google: response has %d candidate(s); parsing first", + len(response.candidates), + ) candidate = response.candidates[0] if candidate.content and candidate.content.parts: + logger.info( + "chat_with_google: candidate has %d part(s); iterating", + len(candidate.content.parts), + ) for part in candidate.content.parts: if part.text: + logger.info( + "chat_with_google: part is text (%d chars); appending to content_text", + len(part.text), + ) content_text += part.text elif part.function_call: + logger.info( + "chat_with_google: part is function_call=%s; building ToolCall", + part.function_call.name, + ) fc = part.function_call # Generate a tool_use ID matching Anthropic format tool_id = fc.id or f"toolu_{uuid.uuid4().hex[:24]}" @@ -77,6 +93,10 @@ def chat_with_google( # Build content list in Anthropic format if content_text: + logger.info( + "chat_with_google: assembling content_list with text block (%d chars)", + len(content_text), + ) content_list.append({"type": "text", "text": content_text}) for tc in tool_calls: diff --git a/services/google_ai/test_chat_with_google.py b/services/google_ai/test_chat_with_google.py index 5dcb81d5b..7a4c46acb 100644 --- a/services/google_ai/test_chat_with_google.py +++ b/services/google_ai/test_chat_with_google.py @@ -333,3 +333,45 @@ def test_integration_tool_call_with_real_tools(mock_insert): for tc in result.tool_calls: assert tc.id assert tc.name + + +@patch("services.google_ai.chat_with_google.insert_llm_request") +@patch("services.google_ai.chat_with_google.get_google_ai_client") +def test_429_is_not_retried_locally_bubbles_to_handle_exceptions( + mock_get_client, mock_insert +): + """Rate-limit retry is handled at the handle_exceptions layer (via + get_rate_limit_retry_after), not inside chat_with_google. A single 429 from + the SDK should propagate unchanged — the decorator picks it up, sleeps the + retry-after hint, and re-invokes the wrapper. Verify chat_with_google itself + does not swallow or loop on 429.""" + from google.genai import errors as google_errors + + err = google_errors.ClientError( + code=429, + response_json={ + "error": { + "code": 429, + "message": "quota exceeded. Please retry in 5s.", + "status": "RESOURCE_EXHAUSTED", + } + }, + ) + client = Mock() + client.models.generate_content.side_effect = err + mock_get_client.return_value = client + + with patch("utils.error.handle_exceptions.time.sleep"): + with pytest.raises(google_errors.ClientError): + chat_with_google( + messages=cast(list[MessageParam], [{"role": "user", "content": "hi"}]), + system_content="sys", + tools=[], + model_id=GoogleModelId.GEMMA_4_31B, + usage_id=1, + created_by="1:t", + ) + # handle_exceptions retries up to TRANSIENT_MAX_ATTEMPTS=3 times before giving up, + # so the SDK gets called 3 times (honoring the 5s hint between each). + assert client.models.generate_content.call_count == 3 + mock_insert.assert_not_called() diff --git a/utils/error/fixtures/real_google_429.txt b/utils/error/fixtures/real_google_429.txt new file mode 100644 index 000000000..2d955701d --- /dev/null +++ b/utils/error/fixtures/real_google_429.txt @@ -0,0 +1 @@ +429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 15, model: gemma-4-31b\nPlease retry in 59.739387544s.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_free_tier_requests', 'quotaId': 'GenerateRequestsPerMinutePerProjectPerModel-FreeTier', 'quotaDimensions': {'location': 'global', 'model': 'gemma-4-31b'}, 'quotaValue': '15'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '59s'}]}} diff --git a/utils/error/get_rate_limit_retry_after.py b/utils/error/get_rate_limit_retry_after.py new file mode 100644 index 000000000..ead5e9fea --- /dev/null +++ b/utils/error/get_rate_limit_retry_after.py @@ -0,0 +1,64 @@ +import requests + +from utils.error.parse_github_rate_limit_headers import ( + parse_github_rate_limit_headers, +) +from utils.error.parse_google_retry_in_message import parse_google_retry_in_message +from utils.error.parse_retry_after_header import parse_retry_after_header +from utils.logging.logging_config import logger + + +def get_rate_limit_retry_after(err: Exception): + """Return the SDK's suggested retry delay in seconds when the error is a rate limit, None otherwise. + + Sentry AGENT-3K5/3K6/3K7/3K8/36M/36Q (Gemini free-tier 429 cascading through chat_with_google → chat_with_model → chat_with_agent → handle_webhook_event): Gemini embeds "Please retry in N.NNNs" in the error message body. + GitHub uses X-RateLimit-Reset/Retry-After headers. Anthropic uses retry-after and anthropic-ratelimit-* headers. + Rather than duplicate sleep+retry logic per SDK, return a single delay that handle_exceptions can honor uniformly. + No upper bound is applied: honor whatever the server suggested. Lambda-timeout protection already exists at the handler layer via should_bail(). + """ + # requests.HTTPError (GitHub, generic 429 APIs) + if isinstance(err, requests.HTTPError): + logger.info("get_rate_limit_retry_after: dispatching requests.HTTPError branch") + response = getattr(err, "response", None) + status_code = getattr(response, "status_code", None) + if status_code not in (403, 429): + logger.info( + "get_rate_limit_retry_after: requests.HTTPError status=%s not in {403,429}", + status_code, + ) + return None + headers = getattr(response, "headers", None) if response is not None else None + if headers and "X-RateLimit-Remaining" in headers: + logger.info( + "get_rate_limit_retry_after: detected github rate-limit headers" + ) + return parse_github_rate_limit_headers(response) + logger.info( + "get_rate_limit_retry_after: no github-specific headers; using Retry-After path" + ) + return parse_retry_after_header(headers) + + # Anthropic RateLimitError / APIStatusError with status_code=429 + status_code = getattr(err, "status_code", None) + if isinstance(status_code, int) and status_code == 429: + logger.info( + "get_rate_limit_retry_after: dispatching anthropic status_code=429 branch" + ) + response = getattr(err, "response", None) + headers = getattr(response, "headers", None) if response is not None else None + logger.info( + "get_rate_limit_retry_after: delegating anthropic delay extraction to parse_retry_after_header" + ) + return parse_retry_after_header(headers) + + # Google GenAI ClientError with code=429 (message body carries the hint) + code = getattr(err, "code", None) + if code == 429: + logger.info("get_rate_limit_retry_after: dispatching google code=429 branch") + return parse_google_retry_in_message(err) + + logger.info( + "get_rate_limit_retry_after: %s is not a recognized rate-limit error", + type(err).__name__, + ) + return None diff --git a/utils/error/handle_exceptions.py b/utils/error/handle_exceptions.py index 51dc8edac..aa68b6402 100644 --- a/utils/error/handle_exceptions.py +++ b/utils/error/handle_exceptions.py @@ -11,6 +11,7 @@ # Third party imports import requests +from utils.error.get_rate_limit_retry_after import get_rate_limit_retry_after from utils.error.handle_generic_error import handle_generic_error from utils.error.handle_http_error import handle_http_error from utils.error.handle_json_error import handle_json_error @@ -92,6 +93,27 @@ async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> R: logger.info("%s invoking attempt %d", func.__name__, attempt) return await func(*args, **kwargs) except requests.HTTPError as err: + rate_limit_delay = get_rate_limit_retry_after(err) + if ( + rate_limit_delay is not None + and remaining_transient_retries > 0 + ): + logger.warning( + "%s rate-limited via HTTPError on attempt %d, sleeping %.2fs", + func.__name__, + attempt, + rate_limit_delay, + ) + remaining_transient_retries -= 1 + await asyncio.sleep(rate_limit_delay) + logger.info( + "%s retrying after rate-limit sleep", func.__name__ + ) + continue + logger.info( + "%s HTTPError not rate-limited or retries exhausted; handing off", + func.__name__, + ) result, retried = handle_http_error( err, func.__name__, @@ -136,6 +158,23 @@ async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> R: ) return cast(R, error_return) except Exception as err: + rate_limit_delay = get_rate_limit_retry_after(err) + if ( + rate_limit_delay is not None + and remaining_transient_retries > 0 + ): + logger.warning( + "%s rate-limited on attempt %d, sleeping %.2fs", + func.__name__, + attempt, + rate_limit_delay, + ) + remaining_transient_retries -= 1 + await asyncio.sleep(rate_limit_delay) + logger.info( + "%s retrying after rate-limit sleep", func.__name__ + ) + continue if remaining_transient_retries > 0 and is_transient_error(err): logger.info( "%s transient-error branch taken", func.__name__ @@ -191,6 +230,22 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: logger.info("%s invoking attempt %d", func.__name__, attempt) return func(*args, **kwargs) except requests.HTTPError as err: + rate_limit_delay = get_rate_limit_retry_after(err) + if rate_limit_delay is not None and remaining_transient_retries > 0: + logger.warning( + "%s rate-limited via HTTPError on attempt %d, sleeping %.2fs", + func.__name__, + attempt, + rate_limit_delay, + ) + remaining_transient_retries -= 1 + time.sleep(rate_limit_delay) + logger.info("%s retrying after rate-limit sleep", func.__name__) + continue + logger.info( + "%s HTTPError not rate-limited or retries exhausted; handing off", + func.__name__, + ) result, retried = handle_http_error( err, func.__name__, @@ -220,6 +275,18 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: ), ) except Exception as err: + rate_limit_delay = get_rate_limit_retry_after(err) + if rate_limit_delay is not None and remaining_transient_retries > 0: + logger.warning( + "%s rate-limited on attempt %d, sleeping %.2fs", + func.__name__, + attempt, + rate_limit_delay, + ) + remaining_transient_retries -= 1 + time.sleep(rate_limit_delay) + logger.info("%s retrying after rate-limit sleep", func.__name__) + continue if remaining_transient_retries > 0 and is_transient_error(err): logger.info("%s transient-error branch taken", func.__name__) backoff = TRANSIENT_BACKOFF_SECONDS * attempt diff --git a/utils/error/handle_github_rate_limit.py b/utils/error/handle_github_rate_limit.py deleted file mode 100644 index 1d4ccc5f7..000000000 --- a/utils/error/handle_github_rate_limit.py +++ /dev/null @@ -1,65 +0,0 @@ -import time -from typing import Any, Callable - -import requests -import sentry_sdk - -from utils.logging.logging_config import logger - - -def handle_github_rate_limit( - err: requests.HTTPError, - func_name: str, - reason: str, - text: str, - raise_on_error: bool, - retry_callback: Callable[[], Any], -): - """Handle GitHub's 403/429 responses for primary and secondary rate limits. - - Returns (retry_result, True) when a retry was performed; returns None when - the caller should fall through to generic Sentry-capture-and-return - behaviour (non-rate-limit 403/429, or raise_on_error already raised).""" - assert err.response is not None - logger.error("err.response.headers: %s", err.response.headers) - limit = int(err.response.headers["X-RateLimit-Limit"]) - remaining = int(err.response.headers["X-RateLimit-Remaining"]) - used = int(err.response.headers["X-RateLimit-Used"]) - - if remaining == 0: - logger.info("%s GitHub primary rate limit exhausted", func_name) - reset_ts = int(err.response.headers.get("X-RateLimit-Reset", 0)) - current_ts = int(time.time()) - wait_time = reset_ts - current_ts - - if wait_time <= 0: - logger.info("%s rate-limit reset already past, waiting 1s", func_name) - wait_time = 1 - else: - logger.info("%s waiting %ds for rate-limit reset", func_name, wait_time) - wait_time = wait_time + 5 - - err_msg = f"{func_name} encountered a GitHubPrimaryRateLimitError: {err}. Retrying after {wait_time} seconds. Limit: {limit}, Remaining: {remaining}, Used: {used}. Reason: {reason}. Text: {text}" - logger.error(err_msg) - time.sleep(wait_time) - logger.info("%s invoking primary-rate-limit retry", func_name) - return retry_callback(), True - - if "exceeded a secondary rate limit" in err.response.text.lower(): - logger.info("%s GitHub secondary rate limit hit", func_name) - retry_after = int(err.response.headers.get("Retry-After", 60)) - err_msg = f"{func_name} encountered a GitHubSecondaryRateLimitError: {err}. Retrying after {retry_after} seconds. Limit: {limit}, Remaining: {remaining}, Used: {used}. Reason: {reason}. Text: {text}" - logger.error(err_msg) - time.sleep(retry_after) - logger.info("%s invoking secondary-rate-limit retry", func_name) - return retry_callback(), True - - err_msg = f"{func_name} encountered an HTTPError: {err}. Limit: {limit}, Remaining: {remaining}, Used: {used}. Reason: {reason}. Text: {text}" - sentry_sdk.capture_exception(err) - logger.error(err_msg) - if raise_on_error: - logger.error("%s re-raising 403/429 HTTPError", func_name) - raise err - - logger.info("%s 403/429 non-rate-limit, falling through to default", func_name) - return None diff --git a/utils/error/handle_http_error.py b/utils/error/handle_http_error.py index b9c66eff5..0c5b1c1da 100644 --- a/utils/error/handle_http_error.py +++ b/utils/error/handle_http_error.py @@ -4,7 +4,6 @@ import requests import sentry_sdk -from utils.error.handle_github_rate_limit import handle_github_rate_limit from utils.error.is_server_error import is_server_error from utils.logging.logging_config import logger @@ -19,6 +18,8 @@ def handle_http_error( error_return: Any, retry_callback: Callable[[], Any], ): + # Rate-limit retry (github primary/secondary, generic Retry-After) is handled at the outer handle_exceptions level via get_rate_limit_retry_after. By the time we get here, a rate-limited HTTPError means the retry budget was already exhausted — treat it like any other HTTPError. + _ = retry_callback # kept in signature for backward-compat with handle_exceptions if err.response is None: logger.info("%s HTTPError has no response object", func_name) if raise_on_error: @@ -51,27 +52,17 @@ def handle_http_error( ) logger.error("reason: %s, text: %s, status_code: %s", reason, text, status_code) - if api_type == "github" and status_code in {403, 429}: - logger.info("%s dispatching to github rate-limit handler", func_name) - retry_result = handle_github_rate_limit( - err, func_name, reason, text, raise_on_error, retry_callback - ) - if retry_result is not None: - logger.info("%s github 403/429 returned retry result", func_name) - return retry_result - - elif api_type == "web_search" and status_code == 429: + if api_type == "web_search" and status_code == 429: logger.info("%s web_search hit 429, raising", func_name) err_msg = f"Web Search Rate Limit in {func_name}()" logger.error(err_msg) logger.error("err.response.headers: %s", err.response.headers) raise err - else: - logger.info("%s reporting HTTPError to Sentry", func_name) - err_msg = f"{func_name} encountered an HTTPError: {err}\n\nArgs: {json.dumps(log_args, indent=2, default=str)}\n\nKwargs: {json.dumps(log_kwargs, indent=2, default=str)}\n\nReason: {reason}\n\nText: {text}" - sentry_sdk.capture_exception(err) - logger.error(err_msg) + logger.info("%s reporting HTTPError to Sentry", func_name) + err_msg = f"{func_name} encountered an HTTPError: {err}\n\nArgs: {json.dumps(log_args, indent=2, default=str)}\n\nKwargs: {json.dumps(log_kwargs, indent=2, default=str)}\n\nReason: {reason}\n\nText: {text}" + sentry_sdk.capture_exception(err) + logger.error(err_msg) if raise_on_error: logger.error("%s HTTPError path re-raising", func_name) diff --git a/utils/error/parse_github_rate_limit_headers.py b/utils/error/parse_github_rate_limit_headers.py new file mode 100644 index 000000000..50ca05bf1 --- /dev/null +++ b/utils/error/parse_github_rate_limit_headers.py @@ -0,0 +1,79 @@ +import time + +from utils.error.parse_retry_after_header import parse_retry_after_header +from utils.logging.logging_config import logger + + +def parse_github_rate_limit_headers(response): + """Parse GitHub's rate-limit headers into a retry delay in seconds, or None when the response is not a github rate limit. + + GitHub signals a primary rate limit by `X-RateLimit-Remaining: 0`, with `X-RateLimit-Reset` giving the unix timestamp when the quota refills. + A secondary (abuse) rate limit uses the standard Retry-After header instead. + Both shapes can coexist. If this function sees a non-zero remaining, it falls back to Retry-After (secondary limit case). + We add a 5s buffer on top of the reset timestamp to guard against small clock skew between the Lambda and GitHub's servers — without it, a retry that fires exactly at reset time occasionally still hits the 403. + """ + headers = getattr(response, "headers", None) + if not headers: + logger.info("parse_github_rate_limit_headers: github response has no headers") + return None + + remaining_raw = headers.get("X-RateLimit-Remaining") + if remaining_raw is None: + logger.info( + "parse_github_rate_limit_headers: no X-RateLimit-Remaining header; not a github rate limit" + ) + return None + + try: + remaining = int(remaining_raw) + except (TypeError, ValueError): + logger.info( + "parse_github_rate_limit_headers: X-RateLimit-Remaining=%r unparseable; ignoring", + remaining_raw, + ) + return None + + if remaining > 0: + logger.info( + "parse_github_rate_limit_headers: X-RateLimit-Remaining=%d > 0; trying Retry-After for secondary limit", + remaining, + ) + return parse_retry_after_header(headers) + + reset_raw = headers.get("X-RateLimit-Reset") + if reset_raw is None: + logger.info( + "parse_github_rate_limit_headers: primary rate limit but no X-RateLimit-Reset header; falling back to Retry-After" + ) + return parse_retry_after_header(headers) + + try: + reset_ts = int(reset_raw) + except (TypeError, ValueError): + logger.info( + "parse_github_rate_limit_headers: X-RateLimit-Reset=%r unparseable; ignoring", + reset_raw, + ) + return None + + delay = float(reset_ts - int(time.time())) + if delay > 0: + logger.info( + "parse_github_rate_limit_headers: primary rate limit resets in %.2fs; adding 5s buffer", + delay, + ) + delay += 5.0 + else: + logger.info( + "parse_github_rate_limit_headers: rate-limit reset already past; using 1s minimum" + ) + delay = 1.0 + limit = headers.get("X-RateLimit-Limit", "?") + used = headers.get("X-RateLimit-Used", "?") + logger.info( + "parse_github_rate_limit_headers: github primary limit exhausted (limit=%s used=%s); honoring %.2fs", + limit, + used, + delay, + ) + return delay diff --git a/utils/error/parse_google_retry_in_message.py b/utils/error/parse_google_retry_in_message.py new file mode 100644 index 000000000..f83afbae4 --- /dev/null +++ b/utils/error/parse_google_retry_in_message.py @@ -0,0 +1,33 @@ +import re + +from utils.logging.logging_config import logger + +# Matches "retry in 3.337s" or "retry in 3s" case-insensitively. +GOOGLE_RETRY_IN_RE = re.compile(r"retry in ([\d.]+)s", re.IGNORECASE) + + +def parse_google_retry_in_message(err: Exception): + """Return the retry delay in seconds extracted from a Gemini 429 message, or None. + + Unlike GitHub or Anthropic, the Gemini SDK surfaces its retry hint in the error message body rather than an HTTP header. + The format is stable ("Please retry in 3.337071738s.") so a simple regex is enough. + If the hint is absent (older API versions or certain quota classes), return None and let the caller fall back to another model. + """ + match = GOOGLE_RETRY_IN_RE.search(str(err)) + if not match: + logger.info( + "parse_google_retry_in_message: google 429 message lacked 'retry in Ns' hint" + ) + return None + + try: + delay = float(match.group(1)) + except ValueError: + logger.info( + "parse_google_retry_in_message: google retry-in value %r unparseable", + match.group(1), + ) + return None + + logger.info("parse_google_retry_in_message: extracted %.2fs retry delay", delay) + return delay diff --git a/utils/error/parse_retry_after_header.py b/utils/error/parse_retry_after_header.py new file mode 100644 index 000000000..90cff5479 --- /dev/null +++ b/utils/error/parse_retry_after_header.py @@ -0,0 +1,30 @@ +from utils.logging.logging_config import logger + + +def parse_retry_after_header(headers): + """Parse an HTTP Retry-After header value (RFC 7231) and return it as float seconds, or None when absent/unparseable. + + Used by GitHub's secondary rate limit, Anthropic's RateLimitError, and any generic 429 response that sets Retry-After. + The spec allows either a non-negative integer (seconds) or an HTTP-date; in practice SDKs almost always return seconds. + We accept numeric values only — the date-form is uncommon and handling it would mean parsing three different datetime conventions for a code path we have never seen in production. + """ + if not headers: + logger.info("parse_retry_after_header: no headers object on response") + return None + + raw = headers.get("Retry-After") or headers.get("retry-after") + if raw is None: + logger.info("parse_retry_after_header: no Retry-After header present") + return None + + try: + delay = float(raw) + except (TypeError, ValueError): + logger.info( + "parse_retry_after_header: Retry-After=%r is not a number; ignoring (date-form unsupported)", + raw, + ) + return None + + logger.info("parse_retry_after_header: parsed %.2fs from Retry-After", delay) + return delay diff --git a/utils/error/test_get_rate_limit_retry_after.py b/utils/error/test_get_rate_limit_retry_after.py new file mode 100644 index 000000000..e2c956844 --- /dev/null +++ b/utils/error/test_get_rate_limit_retry_after.py @@ -0,0 +1,182 @@ +import json +import time +from pathlib import Path +from unittest.mock import Mock + +import pytest +import requests +from google.genai import errors as google_errors + +from utils.error.get_rate_limit_retry_after import get_rate_limit_retry_after + +FIXTURES_DIR = Path(__file__).parent / "fixtures" + + +def _http_error(status, headers=None): + response = Mock() + response.status_code = status + response.headers = headers or {} + err = requests.HTTPError(response=response) + return err + + +def test_returns_none_for_unrelated_exception(): + assert get_rate_limit_retry_after(ValueError("not a rate limit")) is None + + +def test_requests_http_error_non_rate_limit_status_returns_none(): + err = _http_error(500) + assert get_rate_limit_retry_after(err) is None + + +def test_requests_http_error_with_retry_after_header_returns_delay(): + err = _http_error(429, headers={"Retry-After": "12"}) + assert get_rate_limit_retry_after(err) == 12.0 + + +def test_requests_http_error_with_lowercase_retry_after_header_returns_delay(): + err = _http_error(429, headers={"retry-after": "3.5"}) + assert get_rate_limit_retry_after(err) == 3.5 + + +def test_requests_http_error_without_retry_after_returns_none(): + err = _http_error(429, headers={}) + assert get_rate_limit_retry_after(err) is None + + +def test_github_primary_rate_limit_uses_reset_header_with_buffer(): + reset_ts = int(time.time()) + 20 + err = _http_error( + 403, + headers={ + "X-RateLimit-Remaining": "0", + "X-RateLimit-Reset": str(reset_ts), + "X-RateLimit-Limit": "5000", + "X-RateLimit-Used": "5000", + }, + ) + delay = get_rate_limit_retry_after(err) + assert delay is not None + # delay = reset_ts - now + 5s buffer; jitter within a couple seconds is OK + assert 20.0 <= delay <= 30.0 + + +def test_github_primary_rate_limit_past_reset_uses_one_second(): + reset_ts = int(time.time()) - 60 + err = _http_error( + 403, + headers={ + "X-RateLimit-Remaining": "0", + "X-RateLimit-Reset": str(reset_ts), + }, + ) + assert get_rate_limit_retry_after(err) == 1.0 + + +def test_github_secondary_rate_limit_remaining_nonzero_uses_retry_after(): + err = _http_error( + 403, + headers={ + "X-RateLimit-Remaining": "4000", + "Retry-After": "60", + }, + ) + assert get_rate_limit_retry_after(err) == 60.0 + + +def test_anthropic_rate_limit_error_reads_retry_after_header(): + err = Mock(spec=Exception) + err.status_code = 429 + err.response = Mock() + err.response.headers = {"retry-after": "45"} + assert get_rate_limit_retry_after(err) == 45.0 + + +def test_anthropic_429_without_retry_after_returns_none(): + err = Mock(spec=Exception) + err.status_code = 429 + err.response = Mock() + err.response.headers = {} + assert get_rate_limit_retry_after(err) is None + + +def test_anthropic_non_429_status_returns_none(): + err = Mock(spec=Exception) + err.status_code = 500 + err.response = Mock() + err.response.headers = {"retry-after": "45"} + assert get_rate_limit_retry_after(err) is None + + +def test_google_client_error_with_retry_hint_returns_delay(): + err = google_errors.ClientError( + code=429, + response_json={ + "error": { + "code": 429, + "message": "You exceeded your current quota. Please retry in 3.337071738s.", + "status": "RESOURCE_EXHAUSTED", + } + }, + ) + delay = get_rate_limit_retry_after(err) + assert delay == pytest.approx(3.337071738, rel=1e-6) + + +def test_google_client_error_without_retry_hint_returns_none(): + err = google_errors.ClientError( + code=429, + response_json={ + "error": { + "code": 429, + "message": "You exceeded your current quota.", + "status": "RESOURCE_EXHAUSTED", + } + }, + ) + assert get_rate_limit_retry_after(err) is None + + +def test_google_non_429_code_returns_none(): + err = google_errors.ClientError( + code=400, + response_json={"error": {"code": 400, "message": "bad request"}}, + ) + assert get_rate_limit_retry_after(err) is None + + +def test_real_google_429_payload_returns_correct_delay(): + """Fixture is the verbatim 429 line pulled from CloudWatch for the + gitautoai/website incident on 2026-04-20 16:23 UTC (chat_with_google log + entry). See fixtures/real_google_429.txt. The parser only needs the + "Please retry in N.NNNs" substring — the surrounding details[] payload + (Help link, QuotaFailure, RetryInfo) is preserved so the fixture doubles + as a reference for future rate-limit work.""" + fixture_path = FIXTURES_DIR / "real_google_429.txt" + raw_payload = fixture_path.read_text().strip() + assert raw_payload.startswith("429 RESOURCE_EXHAUSTED. ") + hint = "Please retry " + "in " + "59.739387544s" + assert raw_payload.count(hint) == 1 + # The "{...}" after "429 RESOURCE_EXHAUSTED. " is a Python repr with \n + # escapes, not JSON. Feed the exact message text through the SDK so + # str(err) matches what the Lambda logged in production. + message = ( + "You exceeded your current quota, please check your plan and billing details. " + "For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. " + "To monitor your current usage, head to: https://ai.dev/rate-limit. \n" + "* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, " + "limit: 15, model: gemma-4-31b\nPlease retry in 59.739387544s." + ) + response_json = { + "error": { + "code": 429, + "message": message, + "status": "RESOURCE_EXHAUSTED", + } + } + err = google_errors.ClientError(code=429, response_json=response_json) + hint = "Please retry " + "in " + "59.739387544s" + assert str(err).count(hint) == 1 + delay = get_rate_limit_retry_after(err) + assert delay == pytest.approx(59.739387544, rel=1e-6) + _ = json # keeps import usable if fixture changes shape later diff --git a/utils/error/test_handle_exceptions.py b/utils/error/test_handle_exceptions.py index c7b8f35f2..1fbf7259d 100644 --- a/utils/error/test_handle_exceptions.py +++ b/utils/error/test_handle_exceptions.py @@ -724,3 +724,27 @@ def real_bug(): with patch("utils.error.handle_exceptions.time.sleep"): assert real_bug() == "DEFAULT" assert attempts["count"] == 1 + + +def test_decorator_honors_rate_limit_retry_after_hint(): + """When a decorated function raises a rate-limit error carrying a retry-after hint, the decorator sleeps that long and retries under the existing TRANSIENT_MAX_ATTEMPTS budget (see get_rate_limit_retry_after).""" + attempts = {"count": 0} + + def build_429(): + response = MagicMock() + response.status_code = 429 + response.headers = {"Retry-After": "7"} + return requests.HTTPError(response=response) + + @handle_exceptions(default_return_value="DEFAULT", raise_on_error=False) + def rate_limited_then_ok(): + attempts["count"] += 1 + if attempts["count"] == 1: + raise build_429() + return "OK" + + slept = [] + with patch("utils.error.handle_exceptions.time.sleep", side_effect=slept.append): + assert rate_limited_then_ok() == "OK" + assert attempts["count"] == 2 + assert slept == [7.0] diff --git a/utils/error/test_handle_github_rate_limit.py b/utils/error/test_handle_github_rate_limit.py deleted file mode 100644 index e52513c33..000000000 --- a/utils/error/test_handle_github_rate_limit.py +++ /dev/null @@ -1,111 +0,0 @@ -from unittest.mock import Mock, patch - -import requests - -from utils.error.handle_github_rate_limit import handle_github_rate_limit - - -def _github_http_error( - status_code: int, - limit: int, - remaining: int, - used: int, - reset_ts: int = 0, - retry_after: int = 60, - body: str = "", -): - response = requests.Response() - response.status_code = status_code - response.headers["X-RateLimit-Limit"] = str(limit) - response.headers["X-RateLimit-Remaining"] = str(remaining) - response.headers["X-RateLimit-Used"] = str(used) - response.headers["X-RateLimit-Reset"] = str(reset_ts) - response.headers["Retry-After"] = str(retry_after) - response._content = body.encode("utf-8") # pylint: disable=protected-access - response.reason = "Forbidden" if status_code == 403 else "Too Many Requests" - err = requests.HTTPError() - err.response = response - return err - - -def test_primary_rate_limit_retries_via_callback(): - err = _github_http_error(403, limit=5000, remaining=0, used=5000) - retry_callback = Mock(return_value="RETRIED") - - with patch("utils.error.handle_github_rate_limit.time.sleep"): - result = handle_github_rate_limit( - err, - func_name="test_func", - reason="Forbidden", - text="rate limit exceeded", - raise_on_error=False, - retry_callback=retry_callback, - ) - - assert result == ("RETRIED", True) - retry_callback.assert_called_once_with() - - -def test_secondary_rate_limit_retries_via_callback(): - err = _github_http_error( - 403, - limit=5000, - remaining=100, - used=4900, - body="You have exceeded a secondary rate limit", - ) - retry_callback = Mock(return_value="RETRIED") - - with patch("utils.error.handle_github_rate_limit.time.sleep"): - result = handle_github_rate_limit( - err, - func_name="test_func", - reason="Forbidden", - text="exceeded a secondary rate limit", - raise_on_error=False, - retry_callback=retry_callback, - ) - - assert result == ("RETRIED", True) - retry_callback.assert_called_once_with() - - -def test_non_rate_limit_403_returns_none_for_fall_through(): - """403 without rate-limit signatures should fall through to the caller's - generic Sentry-capture path.""" - err = _github_http_error( - 403, limit=5000, remaining=100, used=4900, body="Forbidden" - ) - - with patch("utils.error.handle_github_rate_limit.sentry_sdk"): - result = handle_github_rate_limit( - err, - func_name="test_func", - reason="Forbidden", - text="not rate limited", - raise_on_error=False, - retry_callback=Mock(), - ) - - assert result is None - - -def test_non_rate_limit_403_re_raises_when_raise_on_error_true(): - err = _github_http_error( - 403, limit=5000, remaining=100, used=4900, body="Forbidden" - ) - - with patch("utils.error.handle_github_rate_limit.sentry_sdk"): - try: - handle_github_rate_limit( - err, - func_name="test_func", - reason="Forbidden", - text="not rate limited", - raise_on_error=True, - retry_callback=Mock(), - ) - except requests.HTTPError as raised: - assert raised is err - else: - raise AssertionError("expected HTTPError to be re-raised") diff --git a/utils/error/test_handle_http_error.py b/utils/error/test_handle_http_error.py index 40082252f..13eb5e926 100644 --- a/utils/error/test_handle_http_error.py +++ b/utils/error/test_handle_http_error.py @@ -73,15 +73,20 @@ def test_server_error_returns_default_without_sentry(): mock_sentry.capture_exception.assert_not_called() -def test_github_rate_limit_is_delegated_and_retries(): +def test_github_rate_limit_passes_through_to_generic_handling(): + """Rate-limit retry now lives in handle_exceptions via get_rate_limit_retry_after. + By the time a 403 with X-RateLimit headers reaches handle_http_error, the retry + budget is already exhausted — treat it like any other client error: capture to + Sentry and return the default. retry_callback is kept in the signature but is + not invoked.""" err = _http_error(403, body="rate limit exceeded", reason="Forbidden") err.response.headers["X-RateLimit-Limit"] = "5000" err.response.headers["X-RateLimit-Remaining"] = "0" err.response.headers["X-RateLimit-Used"] = "5000" err.response.headers["X-RateLimit-Reset"] = "0" - retry_callback = Mock(return_value="RETRIED") + retry_callback = Mock() - with patch("utils.error.handle_github_rate_limit.time.sleep"): + with patch("utils.error.handle_http_error.sentry_sdk") as mock_sentry: result = handle_http_error( err, func_name="test", @@ -93,8 +98,9 @@ def test_github_rate_limit_is_delegated_and_retries(): retry_callback=retry_callback, ) - assert result == ("RETRIED", True) - retry_callback.assert_called_once_with() + assert result == ("DEFAULT", False) + retry_callback.assert_not_called() + mock_sentry.capture_exception.assert_called_once_with(err) def test_other_http_error_captures_sentry_and_returns_default(): diff --git a/utils/error/test_parse_github_rate_limit_headers.py b/utils/error/test_parse_github_rate_limit_headers.py new file mode 100644 index 000000000..7752d1e93 --- /dev/null +++ b/utils/error/test_parse_github_rate_limit_headers.py @@ -0,0 +1,93 @@ +import time +from unittest.mock import Mock + +from utils.error.parse_github_rate_limit_headers import parse_github_rate_limit_headers + + +def _response(headers): + r = Mock() + r.headers = headers + return r + + +def test_returns_none_when_response_has_no_headers_attr(): + r = Mock(spec=[]) + assert parse_github_rate_limit_headers(r) is None + + +def test_returns_none_when_headers_is_empty(): + assert parse_github_rate_limit_headers(_response({})) is None + + +def test_returns_none_when_no_rate_limit_remaining_header(): + assert ( + parse_github_rate_limit_headers(_response({"Content-Type": "application/json"})) + is None + ) + + +def test_returns_none_when_remaining_is_unparseable(): + assert ( + parse_github_rate_limit_headers( + _response({"X-RateLimit-Remaining": "not-a-number"}) + ) + is None + ) + + +def test_falls_back_to_retry_after_when_remaining_nonzero(): + # Non-zero remaining = secondary (abuse) rate limit; use Retry-After + response = _response({"X-RateLimit-Remaining": "4000", "Retry-After": "60"}) + assert parse_github_rate_limit_headers(response) == 60.0 + + +def test_secondary_rate_limit_without_retry_after_returns_none(): + response = _response({"X-RateLimit-Remaining": "4000"}) + assert parse_github_rate_limit_headers(response) is None + + +def test_primary_rate_limit_uses_reset_with_buffer(): + reset_ts = int(time.time()) + 30 + response = _response( + { + "X-RateLimit-Remaining": "0", + "X-RateLimit-Reset": str(reset_ts), + "X-RateLimit-Limit": "5000", + "X-RateLimit-Used": "5000", + } + ) + delay = parse_github_rate_limit_headers(response) + assert delay is not None + # reset_ts - now + 5s buffer, allowing a few seconds of wall-clock jitter + assert 30.0 <= delay <= 40.0 + + +def test_primary_rate_limit_past_reset_uses_one_second_floor(): + reset_ts = int(time.time()) - 120 + response = _response( + { + "X-RateLimit-Remaining": "0", + "X-RateLimit-Reset": str(reset_ts), + } + ) + assert parse_github_rate_limit_headers(response) == 1.0 + + +def test_primary_rate_limit_without_reset_falls_back_to_retry_after(): + response = _response( + { + "X-RateLimit-Remaining": "0", + "Retry-After": "15", + } + ) + assert parse_github_rate_limit_headers(response) == 15.0 + + +def test_primary_rate_limit_with_unparseable_reset_returns_none(): + response = _response( + { + "X-RateLimit-Remaining": "0", + "X-RateLimit-Reset": "garbage", + } + ) + assert parse_github_rate_limit_headers(response) is None diff --git a/utils/error/test_parse_google_retry_in_message.py b/utils/error/test_parse_google_retry_in_message.py new file mode 100644 index 000000000..3e955f41f --- /dev/null +++ b/utils/error/test_parse_google_retry_in_message.py @@ -0,0 +1,39 @@ +import pytest +from google.genai import errors as google_errors + +from utils.error.parse_google_retry_in_message import parse_google_retry_in_message + + +def _client_error(message): + return google_errors.ClientError( + code=429, + response_json={ + "error": {"code": 429, "message": message, "status": "RESOURCE_EXHAUSTED"} + }, + ) + + +def test_extracts_retry_delay_from_fractional_seconds(): + err = _client_error("Please retry in 3.337071738s.") + assert parse_google_retry_in_message(err) == pytest.approx(3.337071738, rel=1e-6) + + +def test_extracts_retry_delay_from_integer_seconds(): + err = _client_error("Please retry in 60s.") + assert parse_google_retry_in_message(err) == 60.0 + + +def test_case_insensitive_match(): + err = _client_error("Please RETRY IN 5.5s now.") + assert parse_google_retry_in_message(err) == 5.5 + + +def test_returns_none_when_hint_missing(): + err = _client_error("You exceeded your current quota.") + assert parse_google_retry_in_message(err) is None + + +def test_returns_none_for_different_phrasing(): + # Parser specifically looks for "retry in N.NNNs"; a "wait Ns" variant doesn't match. + err = _client_error("Please wait 10s before retrying.") + assert parse_google_retry_in_message(err) is None diff --git a/utils/error/test_parse_retry_after_header.py b/utils/error/test_parse_retry_after_header.py new file mode 100644 index 000000000..2e4838cde --- /dev/null +++ b/utils/error/test_parse_retry_after_header.py @@ -0,0 +1,44 @@ +from utils.error.parse_retry_after_header import parse_retry_after_header + + +def test_returns_none_when_headers_is_none(): + assert parse_retry_after_header(None) is None + + +def test_returns_none_when_headers_is_empty_dict(): + assert parse_retry_after_header({}) is None + + +def test_returns_none_when_retry_after_missing(): + assert parse_retry_after_header({"Content-Type": "application/json"}) is None + + +def test_parses_integer_seconds_from_canonical_header(): + assert parse_retry_after_header({"Retry-After": "30"}) == 30.0 + + +def test_parses_float_seconds(): + assert parse_retry_after_header({"Retry-After": "3.5"}) == 3.5 + + +def test_parses_lowercase_header_name(): + assert parse_retry_after_header({"retry-after": "10"}) == 10.0 + + +def test_prefers_canonical_case_over_lowercase(): + # Canonical "Retry-After" wins when both are set (shouldn't happen in real + # responses, but documents the lookup order). + headers = {"Retry-After": "5", "retry-after": "99"} + assert parse_retry_after_header(headers) == 5.0 + + +def test_returns_none_when_value_is_http_date(): + # RFC 7231 allows HTTP-date but we only accept numeric seconds. + assert ( + parse_retry_after_header({"Retry-After": "Wed, 21 Oct 2026 07:28:00 GMT"}) + is None + ) + + +def test_returns_none_when_value_is_garbage(): + assert parse_retry_after_header({"Retry-After": "not-a-number"}) is None diff --git a/uv.lock b/uv.lock index d63492239..98ed88e4a 100644 --- a/uv.lock +++ b/uv.lock @@ -596,7 +596,7 @@ wheels = [ [[package]] name = "gitauto" -version = "1.40.1" +version = "1.47.0" source = { virtual = "." } dependencies = [ { name = "annotated-doc" },