diff --git a/CLAUDE.md b/CLAUDE.md index 8586fc006..9ec28bb00 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -41,8 +41,12 @@ sentry-cli issues list --org gitauto-ai --project agent --query "search terms" python3 scripts/sentry/get_issue.py AGENT-20N ``` +Don't pass `--max-rows`. The CLI defaults already return the full list; capping it means you miss issues that should be resolved but sit below the cap. + Then **just `Read` `/tmp/sentry_agent-20n.json`** — it's small. Don't pipe through `python -m json.tool`, `jq`, or inline `python3 -c`; those fail on quoting and waste retries. +Don't pipe `resolve_issue.py` output through `tail` — each issue prints one line and any failure gets hidden if the truncation window is wrong. + ### AWS CLI Configured for us-west-1. **Always `--start-from-head`** with `get-log-events`. diff --git a/pyproject.toml b/pyproject.toml index 421b8dfe8..d854aa62b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "GitAuto" -version = "1.52.0" +version = "1.55.0" requires-python = ">=3.14" dependencies = [ "annotated-doc==0.0.4", diff --git a/services/aws/disable_scheduler.py b/services/aws/disable_scheduler.py new file mode 100644 index 000000000..b905cb66b --- /dev/null +++ b/services/aws/disable_scheduler.py @@ -0,0 +1,32 @@ +# pyright: reportAssignmentType=false +# Third party imports +from botocore.exceptions import ClientError +from mypy_boto3_scheduler.type_defs import UpdateScheduleInputTypeDef + +# Local imports +from services.aws.clients import scheduler_client +from utils.error.handle_exceptions import handle_exceptions +from utils.logging.logging_config import logger + +# Derived from the SDK, not hardcoded — stays in sync with update_schedule's signature. +ALLOWED_UPDATE_FIELDS = frozenset(UpdateScheduleInputTypeDef.__annotations__) + + +@handle_exceptions(default_return_value=False, raise_on_error=False) +def disable_scheduler(schedule_name: str): + try: + current = scheduler_client.get_schedule(Name=schedule_name) + except ClientError as err: + if err.response.get("Error", {}).get("Code") == "ResourceNotFoundException": + logger.info("EventBridge Scheduler not found: %s", schedule_name) + return True + logger.error("EventBridge Scheduler get_schedule failed: %s", err) + raise + + update_input: UpdateScheduleInputTypeDef = { + k: v for k, v in current.items() if k in ALLOWED_UPDATE_FIELDS + } + update_input["State"] = "DISABLED" + scheduler_client.update_schedule(**update_input) + logger.info("Disabled EventBridge Scheduler: %s", schedule_name) + return True diff --git a/services/aws/test_disable_scheduler.py b/services/aws/test_disable_scheduler.py new file mode 100644 index 000000000..3e7774f21 --- /dev/null +++ b/services/aws/test_disable_scheduler.py @@ -0,0 +1,121 @@ +# pylint: disable=unused-argument +# pyright: reportUnusedVariable=false +from unittest.mock import patch + +import pytest +from botocore.exceptions import ClientError + +from services.aws.disable_scheduler import disable_scheduler + + +# Shape mirrors the real get_schedule response, including read-only metadata. +# Fields it should echo into update_schedule mirror ../website/app/actions/aws/create-or-update-schedule.ts. +GET_SCHEDULE_RESPONSE = { + "Name": "gitauto-repo-123-456", + "GroupName": "default", + "ScheduleExpression": "cron(0 15 ? * MON-FRI *)", + "Target": { + "Arn": "arn:aws:lambda:us-west-1:123:function:pr-agent-prod", + "RoleArn": "arn:aws:iam::123:role/scheduler", + "Input": '{"ownerId":123,"repoId":456,"triggerType":"schedule"}', + }, + "FlexibleTimeWindow": {"Mode": "OFF"}, + "State": "ENABLED", + "Description": "GitAuto scheduled trigger for repository foo/bar", + "ActionAfterCompletion": "NONE", + # Read-only metadata that get_schedule returns but update_schedule rejects: + "Arn": "arn:aws:scheduler:us-west-1:123:schedule/default/gitauto-repo-123-456", + "CreationDate": "2026-01-01T00:00:00Z", + "LastModificationDate": "2026-04-01T00:00:00Z", + "ResponseMetadata": {"RequestId": "abc", "HTTPStatusCode": 200}, +} + + +@pytest.fixture +def mock_scheduler_client(): + with patch("services.aws.disable_scheduler.scheduler_client") as mock: + yield mock + + +@pytest.fixture +def mock_logger(): + with patch("services.aws.disable_scheduler.logger") as mock: + yield mock + + +def test_disable_scheduler_success(mock_scheduler_client, mock_logger): + mock_scheduler_client.get_schedule.return_value = dict(GET_SCHEDULE_RESPONSE) + mock_scheduler_client.update_schedule.return_value = {"ScheduleArn": "arn:..."} + + result = disable_scheduler("gitauto-repo-123-456") + + assert result is True + mock_scheduler_client.get_schedule.assert_called_once_with( + Name="gitauto-repo-123-456" + ) + mock_scheduler_client.update_schedule.assert_called_once_with( + Name="gitauto-repo-123-456", + GroupName="default", + ScheduleExpression="cron(0 15 ? * MON-FRI *)", + Target=GET_SCHEDULE_RESPONSE["Target"], + FlexibleTimeWindow={"Mode": "OFF"}, + State="DISABLED", + Description="GitAuto scheduled trigger for repository foo/bar", + ActionAfterCompletion="NONE", + ) + mock_logger.info.assert_any_call( + "Disabled EventBridge Scheduler: %s", "gitauto-repo-123-456" + ) + + +def test_disable_scheduler_forwards_timezone_not_in_hardcoded_set( + mock_scheduler_client, mock_logger +): + """Whitelist comes from UpdateScheduleInputTypeDef, so optional fields the website + never sets (e.g. ScheduleExpressionTimezone) are still forwarded if present on + the schedule. Read-only metadata stays out.""" + response = dict(GET_SCHEDULE_RESPONSE) + response["ScheduleExpressionTimezone"] = "America/New_York" + mock_scheduler_client.get_schedule.return_value = response + + disable_scheduler("gitauto-repo-123-456") + + kwargs = mock_scheduler_client.update_schedule.call_args.kwargs + assert kwargs == { + "Name": "gitauto-repo-123-456", + "GroupName": "default", + "ScheduleExpression": "cron(0 15 ? * MON-FRI *)", + "Target": GET_SCHEDULE_RESPONSE["Target"], + "FlexibleTimeWindow": {"Mode": "OFF"}, + "State": "DISABLED", + "Description": "GitAuto scheduled trigger for repository foo/bar", + "ActionAfterCompletion": "NONE", + "ScheduleExpressionTimezone": "America/New_York", + } + + +def test_disable_scheduler_not_found(mock_scheduler_client, mock_logger): + mock_scheduler_client.get_schedule.side_effect = ClientError( + {"Error": {"Code": "ResourceNotFoundException", "Message": "not found"}}, + "GetSchedule", + ) + + result = disable_scheduler("missing-schedule") + + assert result is True + mock_scheduler_client.update_schedule.assert_not_called() + mock_logger.info.assert_called_once_with( + "EventBridge Scheduler not found: %s", "missing-schedule" + ) + + +def test_disable_scheduler_access_denied(mock_scheduler_client, mock_logger): + mock_scheduler_client.get_schedule.side_effect = ClientError( + {"Error": {"Code": "AccessDeniedException", "Message": "denied"}}, + "GetSchedule", + ) + + result = disable_scheduler("restricted-schedule") + + assert result is False + mock_scheduler_client.update_schedule.assert_not_called() diff --git a/services/slack/daily_usage_report.py b/services/slack/daily_usage_report.py index c05f8fb8d..a77212844 100644 --- a/services/slack/daily_usage_report.py +++ b/services/slack/daily_usage_report.py @@ -1,4 +1,19 @@ +# pylint: disable=wrong-import-position,wrong-import-order +# ruff: noqa: E402 +import os from datetime import datetime, timedelta, timezone +from zoneinfo import ZoneInfo + +from dotenv import load_dotenv + +# Daily report must always target PROD. Locally `.env` provides SUPABASE_URL_PRD alongside a dev SUPABASE_URL — override here so services.supabase.client picks up prod. In GitHub Actions SUPABASE_URL is already prod via secrets and SUPABASE_URL_PRD is absent, so the conditionals are no-ops there. +load_dotenv() +if os.environ.get("SUPABASE_URL_PRD"): + os.environ["SUPABASE_URL"] = os.environ["SUPABASE_URL_PRD"] +if os.environ.get("SUPABASE_SERVICE_ROLE_KEY_PRD"): + os.environ["SUPABASE_SERVICE_ROLE_KEY"] = os.environ[ + "SUPABASE_SERVICE_ROLE_KEY_PRD" + ] from services.aws.get_enabled_schedules import get_enabled_schedules from services.slack.slack_notify import slack_notify @@ -7,53 +22,94 @@ from utils.error.handle_exceptions import handle_exceptions from utils.logging.logging_config import logger +PT = ZoneInfo("America/Los_Angeles") + @handle_exceptions(default_return_value=None, raise_on_error=False) def generate_daily_usage_report(): - """Query last 24h usage/credits, format by scheduled repos, post to Slack.""" + """Query last 24h usage/credits/llm_requests, format by scheduled repos, post to Slack.""" now = datetime.now(tz=timezone.utc) twenty_four_hours_ago = (now - timedelta(hours=24)).isoformat() + logger.info("Daily usage report window starts at %s", twenty_four_hours_ago) # Base: enabled AWS EventBridge schedules repo_keys = get_enabled_schedules() scheduled_repos = resolve_repo_keys(repo_keys) - # Get all usage records in the last 24 hours + # Usage rows in the last 24 hours usage_result = ( supabase.table("usage") - .select("id, owner_name, repo_name, pr_number") + .select("id, owner_id, owner_name, repo_name, pr_number") .gte("created_at", twenty_four_hours_ago) .execute() ) usage_rows = usage_result.data or [] + usage_ids = [row["id"] for row in usage_rows] + logger.debug("Loaded %d usage rows", len(usage_rows)) - # Get credit deductions in the last 24 hours - credit_result = ( + # Revenue: paid inflows in window (purchases + auto_reload top-ups) + revenue_result = ( supabase.table("credits") - .select("usage_id, amount_usd") - .eq("transaction_type", "usage") + .select("owner_id, amount_usd") + .in_("transaction_type", ["purchase", "auto_reload"]) .gte("created_at", twenty_four_hours_ago) .execute() ) - - # Build map of usage_id -> absolute dollar amount - cost_by_usage_id: dict[int, int] = {} - for credit in credit_result.data or []: - if credit["usage_id"] is not None: - cost_by_usage_id[credit["usage_id"]] = abs(int(credit["amount_usd"])) - - # Group usage by (owner_name, repo_name) + revenue_by_owner_id: dict[int, int] = {} + for row in revenue_result.data or []: + owner_id = row["owner_id"] + revenue_by_owner_id[owner_id] = revenue_by_owner_id.get(owner_id, 0) + int( + row["amount_usd"] + ) + + # Cost: our LLM spend for usage rows in window + cost_by_usage_id: dict[int, float] = {} + if usage_ids: + logger.info("Fetching llm_requests for %d usage ids", len(usage_ids)) + llm_result = ( + supabase.table("llm_requests") + .select("usage_id, total_cost_usd") + .in_("usage_id", usage_ids) + .execute() + ) + for row in llm_result.data or []: + uid = row["usage_id"] + if uid is None: + logger.warning("llm_requests row has no usage_id; skipping") + continue + cost_by_usage_id[uid] = cost_by_usage_id.get(uid, 0.0) + float( + row["total_cost_usd"] + ) + else: + logger.info("No usage rows in window; skipping llm_requests fetch") + + # Group by (owner_name, repo_name): dedupe PRs, count runs per PR, attribute cost repo_usage: dict[tuple[str, str], dict] = {} for row in usage_rows: key = (row["owner_name"], row["repo_name"]) if key not in repo_usage: - repo_usage[key] = {"prs": [], "no_pr_count": 0, "cost": 0} - cost = cost_by_usage_id.get(row["id"], 0) - repo_usage[key]["cost"] += cost - if row["pr_number"] and row["pr_number"] > 0: - repo_usage[key]["prs"].append(row["pr_number"]) + logger.debug("init repo_usage for %s/%s", key[0], key[1]) + repo_usage[key] = { + "owner_id": row["owner_id"], + "pr_runs": {}, + "pr_cost": {}, + "no_pr_count": 0, + "no_pr_cost": 0.0, + } + cost = cost_by_usage_id.get(row["id"], 0.0) + pr_num = row["pr_number"] + if pr_num and pr_num > 0: + logger.debug("PR run %s in %s/%s", pr_num, key[0], key[1]) + repo_usage[key]["pr_runs"][pr_num] = ( + repo_usage[key]["pr_runs"].get(pr_num, 0) + 1 + ) + repo_usage[key]["pr_cost"][pr_num] = ( + repo_usage[key]["pr_cost"].get(pr_num, 0.0) + cost + ) else: + logger.debug("no-PR run in %s/%s", key[0], key[1]) repo_usage[key]["no_pr_count"] += 1 + repo_usage[key]["no_pr_cost"] += cost # All repos: scheduled + any with usage all_keys = set(scheduled_repos.keys()) | set(repo_usage.keys()) @@ -61,52 +117,109 @@ def generate_daily_usage_report(): # Group by owner by_owner: dict[str, list[tuple[str, str]]] = {} for key in all_keys: - owner_name = key[0] - if owner_name not in by_owner: - by_owner[owner_name] = [] - by_owner[owner_name].append(key) - - # Calculate totals - total_prs = sum(len(u["prs"]) for u in repo_usage.values()) - total_credits_usd = sum(abs(int(c["amount_usd"])) for c in credit_result.data or []) - no_pr_usd = sum( - u["cost"] for u in repo_usage.values() if u["no_pr_count"] > 0 and not u["prs"] + by_owner.setdefault(key[0], []).append(key) + + # Totals + total_unique_prs = sum(len(u["pr_runs"]) for u in repo_usage.values()) + total_runs = sum( + sum(u["pr_runs"].values()) + u["no_pr_count"] for u in repo_usage.values() + ) + total_cost = sum(cost_by_usage_id.values()) + total_revenue = sum(revenue_by_owner_id.values()) + no_pr_cost_total = sum( + u["no_pr_cost"] + for u in repo_usage.values() + if u["no_pr_count"] > 0 and not u["pr_runs"] ) - # Format Slack message - date_str = now.strftime("%Y-%m-%d") + # Format Slack message — PT date so header isn't ambiguous + date_str = now.astimezone(PT).strftime("%Y-%m-%d") + margin_str = ( + f"{(total_revenue - total_cost) / total_revenue * 100:.1f}%" + if total_revenue > 0 + else "N/A" + ) lines = [ - f"Daily Usage Report ({date_str})", - f"Total: {total_prs} PRs, ${total_credits_usd}", + f"Daily Usage Report ({date_str} PT)", + f"Revenue: ${total_revenue} | Cost: ${total_cost:.2f} | Margin: {margin_str}", + f"{total_unique_prs} PRs / {total_runs} runs", ] - if no_pr_usd > 0: - lines.append(f" ${no_pr_usd} from usage without PRs") + if no_pr_cost_total > 0: + logger.info("Adding alert: $%.2f no-PR cost", no_pr_cost_total) + lines.append( + f" ${no_pr_cost_total:.2f} cost from repos with usage but no PRs" + ) lines.append("") for owner_name in sorted(by_owner): keys = sorted(by_owner[owner_name], key=lambda k: k[1]) - owner_prs = sum(len(repo_usage.get(k, {}).get("prs", [])) for k in keys) - owner_cost = sum(repo_usage.get(k, {}).get("cost", 0) for k in keys) - lines.append(f"*{owner_name}* ({owner_prs} PRs, ${owner_cost})") + owner_id = next( + (repo_usage[k]["owner_id"] for k in keys if k in repo_usage), + None, + ) + owner_revenue = revenue_by_owner_id.get(owner_id, 0) if owner_id else 0 + owner_unique_prs = sum( + len(repo_usage.get(k, {}).get("pr_runs", {})) for k in keys + ) + owner_runs = sum( + sum(repo_usage.get(k, {}).get("pr_runs", {}).values()) + + repo_usage.get(k, {}).get("no_pr_count", 0) + for k in keys + ) + owner_cost = sum( + sum(repo_usage.get(k, {}).get("pr_cost", {}).values()) + + repo_usage.get(k, {}).get("no_pr_cost", 0.0) + for k in keys + ) + lines.append( + f"*{owner_name}* Revenue: ${owner_revenue} Cost: ${owner_cost:.2f} " + f"({owner_unique_prs} PRs / {owner_runs} runs)" + ) for key in keys: repo_name = key[1] usage = repo_usage.get(key) - if usage and usage["prs"]: - pr_list = ", ".join(f"#{pr}" for pr in sorted(usage["prs"])) - lines.append(f" {repo_name} {pr_list}") + if usage and usage["pr_runs"]: + logger.debug("fmt %s/%s (PRs)", key[0], repo_name) + repo_cost = sum(usage["pr_cost"].values()) + usage["no_pr_cost"] + pr_parts = [] + for pr_num in sorted(usage["pr_runs"]): + runs = usage["pr_runs"][pr_num] + pr_cost = usage["pr_cost"][pr_num] + suffix = f" ({runs}×)" if runs > 1 else "" + pr_parts.append(f"#{pr_num}{suffix} ${pr_cost:.2f}") + lines.append( + f" {repo_name} Cost: ${repo_cost:.2f} {', '.join(pr_parts)}" + ) elif usage and usage["no_pr_count"] > 0: - lines.append(f" {repo_name} ({usage['no_pr_count']} runs, no PR)") + logger.debug("fmt %s/%s (no-PR only)", key[0], repo_name) + lines.append( + f" {repo_name} ({usage['no_pr_count']} runs, no PR) " + f"Cost: ${usage['no_pr_cost']:.2f}" + ) else: + logger.debug("fmt %s/%s (scheduled, no usage)", key[0], repo_name) lines.append(f" {repo_name} --") lines.append("") message = "\n".join(lines) - logger.info("Daily usage report: %d PRs, $%d", total_prs, total_credits_usd) + logger.info( + "Daily usage report: %d PRs, %d runs, revenue $%d, cost $%.2f", + total_unique_prs, + total_runs, + total_revenue, + total_cost, + ) + logger.info("Daily usage report body:\n%s", message) slack_notify(message) - - return {"prs": total_prs, "total_usd": total_credits_usd} + logger.debug("slack_notify dispatched") + return { + "prs": total_unique_prs, + "runs": total_runs, + "revenue_usd": total_revenue, + "cost_usd": total_cost, + } if __name__ == "__main__": diff --git a/uv.lock b/uv.lock index 45bc541c8..00f7af074 100644 --- a/uv.lock +++ b/uv.lock @@ -596,7 +596,7 @@ wheels = [ [[package]] name = "gitauto" -version = "1.52.0" +version = "1.55.0" source = { virtual = "." } dependencies = [ { name = "annotated-doc" },