diff --git a/.deepreview b/.deepreview index e8a57f09..22e2296e 100644 --- a/.deepreview +++ b/.deepreview @@ -386,3 +386,45 @@ deepreview_config_quality: - FAIL: Issues found. List each with the .deepreview file path, rule name, which check failed (consolidation / description / overly-broad / placement), and a specific recommendation. + +job_schema_instruction_compatibility: + description: "Verify deepwork_jobs instruction files, templates, and examples are compatible with the job schema." + match: + include: + - "src/deepwork/jobs/job.schema.json" + - "src/deepwork/standard_jobs/deepwork_jobs/steps/*.md" + - "src/deepwork/standard_jobs/deepwork_jobs/templates/*" + - "src/deepwork/standard_jobs/deepwork_jobs/job.yml" + review: + strategy: matches_together + additional_context: + unchanged_matching_files: true + instructions: | + When the job schema or deepwork_jobs instruction files change, verify they + are still compatible with each other. + + Read src/deepwork/jobs/job.schema.json to understand the current schema. + Then read each instruction file, template, and example in + src/deepwork/standard_jobs/deepwork_jobs/ and check: + + 1. **Field references**: Every field name mentioned in prose instructions, + templates, or examples must exist in the schema at the correct level. + Pay special attention to root-level vs step-level fields — a field + that exists on steps may not exist at the root, and vice versa. + + 2. **Required vs optional**: If instructions say a field is required, + verify the schema agrees. If instructions say a field is optional, + verify the schema doesn't require it. + + 3. **Schema structure**: Template files and examples that show YAML + structure must match the schema's property names and nesting. + + 4. **Terminology consistency**: Instructions should use the same field + names as the schema (e.g., if the schema uses + "common_job_info_provided_to_all_steps_at_runtime", instructions + should not call it "description" or "job_description"). + + Output Format: + - PASS: All instruction files are compatible with the schema. + - FAIL: Incompatibilities found. List each with the file path, line + reference, the incompatible content, and what the schema actually says. diff --git a/.deepwork/.gitignore b/.deepwork/.gitignore index 7b273563..e597ebbd 100644 --- a/.deepwork/.gitignore +++ b/.deepwork/.gitignore @@ -2,6 +2,7 @@ # These files are generated during sessions and should not be committed .last_work_tree .last_head_ref +job.schema.json # Temporary files (but keep the directory via .gitkeep) tmp/* diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 14a8282b..c9cb22a9 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -21,7 +21,7 @@ We use a skip pattern so the same required checks pass in both PR and merge queu |----------|--------|----------------|----------------| | **Validate** | Runs | Runs | Runs | | **Integration Tests** | Skipped (passes) | Runs | Runs | -| **E2E Tests** | Skipped (passes) | Runs | Runs | +| **E2E Tests** | Skipped unless workflow file changed | Runs | Runs | | **CLA Check** | Runs | Skipped (passes) | Skipped (passes) | ### How It Works @@ -64,7 +64,6 @@ When a job is skipped due to an `if` condition, GitHub treats it as a successful In GitHub branch protection rules, require these checks: - `Validate / tests` -- `Claude Code Integration Test / pr-check` (for PRs) - `Claude Code Integration Test / validate-generation` (for merge queue) - `Claude Code Integration Test / claude-code-e2e` (for merge queue) - `CLA Assistant / merge-queue-pass` (for merge queue) @@ -84,10 +83,10 @@ All checks will pass in both PR and merge queue contexts (either by running or b ### claude-code-test.yml - **Triggers**: `pull_request` (main), `merge_group` (main), `workflow_dispatch` - **Jobs**: - - `pr-check`: Runs on PRs only, always passes (lightweight check) - `validate-generation`: Tests skill generation from fixtures (no API key needed) - `claude-code-e2e`: Full end-to-end test with Claude Code CLI (requires `ANTHROPIC_API_KEY`) -- `validate-generation` and `claude-code-e2e` skip on PRs, run in merge queue and manual dispatch +- `validate-generation` skips on PRs, runs in merge queue and manual dispatch +- `claude-code-e2e` skips on PRs unless the workflow file itself is changed (so CI fixes can be iterated in PRs) ### cla.yml - **Triggers**: `pull_request_target`, `issue_comment`, `merge_group` (main), `workflow_dispatch` diff --git a/.github/workflows/claude-code-test.yml b/.github/workflows/claude-code-test.yml index 5244e8a7..26f9be1b 100644 --- a/.github/workflows/claude-code-test.yml +++ b/.github/workflows/claude-code-test.yml @@ -130,16 +130,32 @@ jobs: env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} steps: - # For PRs: just pass quickly (actual tests run in merge queue) - - name: Skip on PR - if: github.event_name == 'pull_request' - run: echo "E2E tests will run in merge queue. Passing for PR." + # Determine whether to run the full e2e test suite. + # Always runs in merge_group and workflow_dispatch. + # For PRs, only runs if the workflow file itself was changed (so we can iterate on CI fixes). + - name: Determine if tests should run + id: should-run + env: + GH_TOKEN: ${{ github.token }} + run: | + if [ "${{ github.event_name }}" != "pull_request" ]; then + echo "run=true" >> $GITHUB_OUTPUT + else + FILES=$(gh api repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/files --jq '.[].filename' 2>/dev/null || echo "") + if echo "$FILES" | grep -q '^\.github/workflows/claude-code-test\.yml$'; then + echo "run=true" >> $GITHUB_OUTPUT + echo "Workflow file changed in PR - running e2e tests" + else + echo "run=false" >> $GITHUB_OUTPUT + echo "E2E tests will run in merge queue. Passing for PR." + fi + fi - uses: actions/checkout@v4 - if: github.event_name != 'pull_request' + if: steps.should-run.outputs.run == 'true' - name: Check for API key - if: github.event_name != 'pull_request' + if: steps.should-run.outputs.run == 'true' id: check-key run: | if [ -z "$ANTHROPIC_API_KEY" ]; then @@ -221,7 +237,8 @@ jobs: 'allow': [ 'Bash(*)', 'Read(./**)', 'Edit(./**)', 'Write(./**)', 'Skill(*)', 'mcp__deepwork__get_workflows', 'mcp__deepwork__start_workflow', - 'mcp__deepwork__finished_step', 'mcp__deepwork__abort_workflow' + 'mcp__deepwork__finished_step', 'mcp__deepwork__abort_workflow', + 'mcp__deepwork__go_to_step' ] } } @@ -247,9 +264,10 @@ jobs: echo "=== Running /deepwork to create fruits job ===" mkdir fruits - # Use --debug to capture detailed logs for diagnosing failures. - # The debug log is dumped in the failure handler below. - claude --print --debug --model claude-sonnet-4-5 <<'PROMPT_EOF' + # Use --debug and --output-format stream-json for diagnosing failures. + # stream-json shows every tool call; output is captured to a file for the failure handler. + set -o pipefail + claude --print --verbose --output-format stream-json --max-turns 20 --debug --model claude-sonnet-4-6 --dangerously-skip-permissions <<'PROMPT_EOF' | tee ../claude-create-job.jsonl /deepwork I want to create a simple job called "fruits" for identifying and classifying fruits. Here are the EXACT specifications. @@ -268,9 +286,12 @@ jobs: **CRITICAL**: must put the classified fruit list in `./fruits/classified_fruits.md`. **Key Instructions:** - - Do not ask questions - just make the job + - NEVER use AskUserQuestion — you already have all the information you need above. + - You MUST complete all tool calls needed to create the files. Do not stop early. + - Do not ask questions - just make the job. - Rules are explicitly not desired. Tell the review agents that. - Do not give long commentary of what you did - just make the job with no commentary. + - NEVER start the "repair" or "learn" workflows. Only use "new_job". If a quality review fails, fix the issues in the files and resubmit — do not switch workflows. - IMPORTANT: Once the job.yml and step instruction files have been created (i.e. after the "define" and "implement" steps are done), STOP. Do NOT continue into the "test" or "iterate" steps. Abort the workflow at that point. We only need the job definition files created, not the full workflow run. PROMPT_EOF @@ -309,6 +330,22 @@ jobs: if: failure() && steps.check-key.outputs.has_key == 'true' working-directory: test_project run: | + echo "=== Claude stream-json output (create job) ===" + if [ -f "../claude-create-job.jsonl" ]; then + echo "--- Last 100 lines ---" + tail -100 ../claude-create-job.jsonl + else + echo "No stream-json output captured for create job step" + fi + echo "" + echo "=== Claude stream-json output (run workflow) ===" + if [ -f "../claude-run-workflow.jsonl" ]; then + echo "--- Last 100 lines ---" + tail -100 ../claude-run-workflow.jsonl + else + echo "No stream-json output captured for run workflow step" + fi + echo "" echo "=== Claude debug log ===" # Claude --debug writes to ~/.claude/debug.log if [ -f "$HOME/.claude/debug.log" ]; then @@ -340,8 +377,12 @@ jobs: run: | echo "=== Running fruits workflow with test input via /deepwork ===" - claude --print --model claude-sonnet-4-5 <<'PROMPT_EOF' + set -o pipefail + claude --print --verbose --output-format stream-json --max-turns 20 --debug --model claude-sonnet-4-6 --dangerously-skip-permissions <<'PROMPT_EOF' | tee ../claude-run-workflow.jsonl /deepwork Run the fruits full workflow. Process the list to the file and don't give any extra commentary or text output. + NEVER use AskUserQuestion — you already have all the information you need. + You MUST complete all tool calls needed. Do not stop early. + CRITICAL: All output files MUST be written relative to the current working directory (the project root), NOT inside .deepwork/jobs/. For example, write to ./fruits/identified_fruits.md, NOT .deepwork/jobs/fruits/identified_fruits.md. raw_items: apple, car, banana, chair, orange, table, mango, laptop, grape, bicycle PROMPT_EOF @@ -405,4 +446,6 @@ jobs: test_project/.claude/skills/deepwork/ test_project/fruits/identified_fruits.md test_project/fruits/classified_fruits.md + claude-create-job.jsonl + claude-run-workflow.jsonl retention-days: 7 diff --git a/specs/deepwork/jobs/JOBS-REQ-001-mcp-workflow-tools.md b/specs/deepwork/jobs/JOBS-REQ-001-mcp-workflow-tools.md index fe40700e..a7004df9 100644 --- a/specs/deepwork/jobs/JOBS-REQ-001-mcp-workflow-tools.md +++ b/specs/deepwork/jobs/JOBS-REQ-001-mcp-workflow-tools.md @@ -20,6 +20,7 @@ The DeepWork MCP server exposes five workflow tools to AI agents via the Model C 10. The server MUST be named `"deepwork"`. 11. The server MUST include instructions text describing the workflow lifecycle (Discover, Start, Execute, Checkpoint, Iterate, Continue, Complete, Going Back). 12. Every tool call MUST be logged with the tool name and current stack state. +13. On startup, the server MUST copy `job.schema.json` from its package-bundled location to `.deepwork/job.schema.json` under the project root, overwriting any existing file at that path. If the copy fails (e.g., permission error), the server MUST log a warning and continue without error. ### JOBS-REQ-001.2: get_workflows Tool diff --git a/src/deepwork/jobs/mcp/server.py b/src/deepwork/jobs/mcp/server.py index a36fef75..7f8371ec 100644 --- a/src/deepwork/jobs/mcp/server.py +++ b/src/deepwork/jobs/mcp/server.py @@ -14,6 +14,7 @@ from __future__ import annotations import logging +import shutil from pathlib import Path from typing import Any @@ -34,6 +35,26 @@ logger = logging.getLogger("deepwork.jobs.mcp") +def _ensure_schema_available(project_root: Path) -> None: + """Copy job.schema.json to .deepwork/ so agents have a stable reference path. + + The schema file is bundled with the DeepWork package at an install-dependent + location. This copies it to .deepwork/job.schema.json on every server start + so that agents and step instructions can always reference it at a known path. + """ + from deepwork.jobs.schema import get_schema_path + + schema_source = get_schema_path() + target_dir = project_root / ".deepwork" + target = target_dir / "job.schema.json" + + try: + target_dir.mkdir(parents=True, exist_ok=True) + shutil.copy2(schema_source, target) + except OSError: + logger.warning("Could not copy schema to %s", target) + + def create_server( project_root: Path | str, enable_quality_gate: bool = True, @@ -60,6 +81,9 @@ def create_server( """ project_path = Path(project_root).resolve() + # Copy the job schema to a stable location so agents can always reference it + _ensure_schema_available(project_path) + # Initialize components state_manager = StateManager(project_path) diff --git a/src/deepwork/jobs/mcp/tools.py b/src/deepwork/jobs/mcp/tools.py index 76557cc2..9cde1b5d 100644 --- a/src/deepwork/jobs/mcp/tools.py +++ b/src/deepwork/jobs/mcp/tools.py @@ -347,15 +347,16 @@ def get_workflows(self) -> GetWorkflowsResponse: """ jobs, load_errors = self._load_all_jobs() job_infos = [self._job_to_info(job) for job in jobs] - repair_hint = ( - "\nThis project likely needs `/deepwork:repair` run to correct the issue" - " unless the offending file(s) were changed this session and the agent can fix it directly." - ) error_infos = [ JobLoadErrorInfo( job_name=e.job_name, job_dir=e.job_dir, - error=e.error + repair_hint, + error=( + f"{e.error}\n" + f"The invalid file is {e.job_dir}/job.yml. " + f"If you edited that file this session, fix it directly. " + f"If you did not edit it, the project may need `/deepwork repair` to migrate legacy formats." + ), ) for e in load_errors ] diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml index d35cdbf7..656bd186 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml +++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml @@ -16,6 +16,24 @@ common_job_info_provided_to_all_steps_at_runtime: | confusion or inefficiencies, and improves job instructions. It also captures bespoke learnings specific to the current run into AGENTS.md files in the working folder. + ## Job Schema (CRITICAL) + + Before creating or editing any `job.yml` file, you MUST read the JSON schema at + `.deepwork/job.schema.json`. This schema is the authoritative source of truth for + all valid fields, types, and structures. The schema uses `additionalProperties: false` + at every level, so any extra or misspelled fields will cause validation failures. + + Key schema rules that agents commonly get wrong: + - **Inputs use `oneOf`** — there are exactly two input formats, with no extra fields allowed: + - User parameter: `{name: str, description: str}` — ONLY these two fields + - File from prior step: `{file: str, from_step: str}` — ONLY these two fields + - **No `type` field on inputs** — do NOT add `type: "user_provided"` or `type: "file"` to inputs + - **No `path` field on inputs** — file paths are resolved by the framework, not specified in inputs + - **Output keys** are the output name; values have `{type: "file"|"files", description: str, required: bool}` + - **No `description` field at root level** — use `common_job_info_provided_to_all_steps_at_runtime` instead + + Always read the schema file and validate your job.yml structure against it. + workflows: - name: new_job summary: "Create a new DeepWork job from scratch through definition, implementation, testing, and iteration" diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md index 0ff8a969..cb483667 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md +++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md @@ -144,6 +144,8 @@ The `review_draft` step's instructions should tell the agent to: **When to recognize this pattern:** Look for language like "keep refining until X", "iterate until satisfied", "go back and redo Y if Z", or any cycle where later steps may invalidate earlier work. If the iteration involves just one step retrying its own output, rely on quality reviews instead. +**After defining all steps**, check whether any step matches the Parallel Sub-Workflow or Iterative Loop patterns above. If so, discuss the pattern with the user and restructure the workflow accordingly before proceeding to validation. + ### Step 3: Validate the Workflow After gathering information about all steps: @@ -161,18 +163,20 @@ After gathering information about all steps: 3. **Confirm details** - Job name (lowercase, underscores, descriptive) - Job summary (one clear sentence, max 200 chars) - - Job description (detailed multi-line explanation) + - Common job info provided to all steps at runtime (detailed multi-line context shared across all steps) - Version number (start with 1.0.0) ### Step 4: Define Quality Reviews +**Constraint: Every step producing a written final deliverable MUST have at least one review defined.** + For each step, define **reviews** that evaluate the step's outputs. Reviews run automatically when a step completes and provide quality validation loops. For intermediate outputs between steps, reviews let you make sure you don't go too far down the wrong path. Add reviews that confirm things that could cause problems later. For example, in a report creation process, you might have an intermediate step that performs a number of queries on the data and records the results so that later report-writing steps can synthesize that information into a coherent narrative. In this case, you would want to add a review that checks that the queries SQL matches up with the description of the queries in the job description. -For final outputs, reviews let you make sure the output meets the user's expectations. For example, with a data-centric report job, you might have one review on the final output for consistency with style guidelines and tone and such, and a totally separate review on the data-backing to make sure the claims in the report are supported by the data from earlier steps and all have citations. +For final outputs, reviews let you make sure the output meets the user's expectations. For example, with a data-centric report job, you might have one review on the final output for consistency with style guidelines and tone and such, and a totally separate review on the data-backing to make sure the claims in the report are supported by the data from earlier steps and all have citations. -**Any jobs with written final output must always have reviews**. Some suggested ones are: +Some suggested review types for final outputs: - Ensure claims have citations and the citations are not hallucinated - Ensure the output follows the style guidelines and tone - Ensure the output is well-organized and easy to read @@ -247,9 +251,11 @@ reviews: [] ### Step 5: Create the Job Directory and Specification -Only after you have complete understanding, create the job directory and `job.yml` file: +Only after you have complete understanding, create the job directory and `job.yml` file. + +**Note**: `[job_dir]` refers to the `job_dir` path returned in the workflow response when this workflow was started. It points to the directory containing this job's definition, scripts, and templates. -**First, create the directory structure** using the `make_new_job.sh` script located in this job's directory (the `job_dir` path from the workflow response): +**First, create the directory structure** using the `make_new_job.sh` script: ```bash [job_dir]/make_new_job.sh [job_name] @@ -331,7 +337,7 @@ Claude: Let me summarize the workflow I've designed based on our conversation: **Job: competitive_research** Summary: Systematic competitive analysis workflow for product positioning -Description: +Common job info (shared context for all steps): A comprehensive workflow for analyzing competitors in your market segment. This job helps product teams understand the competitive landscape by systematically identifying competitors, researching their offerings, creating comparison matrices, and developing strategic positioning recommendations. The workflow produces: @@ -384,7 +390,7 @@ Implement the job to generate step instruction files. 1. **Focus on specification only** - Don't create instruction files yet 2. **Ask structured questions** - Never skip the discovery phase; use the AskUserQuestion tool -3. **Rich context in description** - This helps with future refinement +3. **Rich context in common_job_info_provided_to_all_steps_at_runtime** - Include the job's purpose, what the workflow produces, the intended audience, and domain-specific context that steps will need (see the example dialog for a reference) 4. **Validate understanding** - Summarize and confirm before creating 5. **Use examples** - Help users understand what good specifications look like 6. **Understand file organization** - Always ask structured questions about where outputs should be saved and if subdirectories are needed @@ -395,7 +401,7 @@ Before creating the job.yml, ensure: - Job name: lowercase, underscores, no spaces - Version: semantic versioning (1.0.0) - Summary: concise, under 200 characters -- Description: detailed, provides context +- Common job info: detailed, provides shared context for all steps - Step IDs: unique, descriptive, lowercase with underscores - Dependencies: must reference existing step IDs - File inputs: `from_step` must be in dependencies diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md index 516b294a..e314ebe5 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md +++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md @@ -8,6 +8,8 @@ Generate step instruction files for each step based on the `job.yml` specificati Read the `job.yml` specification file created by the define step and generate comprehensive instruction files for each step. The define step has already created the job directory structure. +**Note**: Throughout this document, `` refers to the `job_dir` path returned in the workflow response when this workflow was started. It points to the directory containing this job's definition and templates. + ### Step 1: Read and Validate the Specification 1. **Locate the job.yml file** @@ -15,13 +17,13 @@ Read the `job.yml` specification file created by the define step and generate co - Parse the YAML content 2. **Validate the specification** - - Ensure it follows the schema (name, version, summary, description, steps) + - Ensure it follows the schema (name, version, summary, common_job_info_provided_to_all_steps_at_runtime, steps) - Check that all dependencies reference existing steps - Verify no circular dependencies - Confirm file inputs match dependencies 3. **Extract key information** - - Job name, version, summary, description + - Job name, version, summary, common_job_info_provided_to_all_steps_at_runtime - List of all steps with their details - Understand the workflow structure @@ -42,7 +44,7 @@ For each step in the job.yml, create a comprehensive instruction file at `.deepw **Guidelines for generating instructions:** -1. **Use the job description** - The detailed description from job.yml provides crucial context +1. **Use the common job info** - The `common_job_info_provided_to_all_steps_at_runtime` from job.yml provides crucial context 2. **Be specific** - Don't write generic instructions; tailor them to the step's purpose 3. **Provide output format examples** - Include a markdown code block in an "Output Format" section showing the expected file structure. A template with `[bracket placeholders]` is acceptable. For complex outputs, also include a concrete filled-in example showing realistic data — this is especially valuable for the first step in a workflow where there's no prior output to reference. 4. **Explain the "why"** - Help the user understand the step's role in the workflow @@ -51,7 +53,7 @@ For each step in the job.yml, create a comprehensive instruction file at `.deepw 7. **Ask structured questions (when applicable)** - When a step has user-provided inputs (name/description inputs in job.yml), the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool. Steps that only have file inputs from prior steps do NOT need this phrase — they process data without user interaction. 8. **Handle edge cases** - If inputs might be missing, ambiguous, or incomplete, tell the agent to ask structured questions to clarify how to proceed rather than guessing -### Handling Reviews +#### Handling Reviews If a step in the job.yml has `reviews` defined, the generated instruction file should: @@ -82,7 +84,7 @@ If a step in the job.yml has `reviews` defined, the generated instruction file s This alignment ensures the AI agent knows exactly what will be validated and can self-check before completing. -### Writing Loop Instructions (go_to_step) +#### Writing Loop Instructions (go_to_step) If a step in the job.yml is designed as a decision point that may loop back to an earlier step (see the "Iterative Loop Pattern" in the define step), the instruction file for that step must include clear guidance on when and how to use `go_to_step`. @@ -117,7 +119,7 @@ regardless and document any remaining issues in the output. **Important**: Only add `go_to_step` instructions to steps that are explicitly designed as loop decision points in the workflow. Most steps should NOT reference `go_to_step`. -### Using Supplementary Reference Files +#### Using Supplementary Reference Files Step instructions can include additional `.md` files in the `steps/` directory for detailed examples, templates, or reference material. Reference them using the full path from the project root. @@ -135,21 +137,15 @@ For a complete worked example showing a job.yml and corresponding step instructi - **Job specification**: `/templates/job.yml.example` - **Step instruction**: `/templates/step_instruction.md.example` -## Important Guidelines - -1. **Read the spec carefully** - Understand the job's intent from the description -2. **Generate complete instructions** - Don't create placeholder or stub files -3. **Maintain consistency** - Use the same structure for all step instruction files -4. **Provide examples** - Show what good output looks like -5. **Use context** - The job description provides valuable context for each step -6. **Be specific** - Tailor instructions to the specific step, not generic advice - ## Completion Checklist Before marking this step complete, ensure: - [ ] job.yml validated and in job directory - [ ] All step instruction files created -- [ ] Each instruction file is complete and actionable +- [ ] Each instruction file uses the same structure (consistent with the template) +- [ ] Each instruction file has an Output Format section with examples +- [ ] Quality criteria in instruction files align with reviews defined in job.yml +- [ ] Steps with user-provided inputs include guidance to ask structured questions ## Note: Workflow Availability diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md index 6df0c1a1..64d9a48f 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md +++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md @@ -210,7 +210,6 @@ I identified the following issues from the test run: **`.deepwork/jobs/monthly_report/job.yml`:** - Version bumped to 1.0.1 -- Added changelog: "Improved visual design guidance and audience-appropriate writing based on test feedback" Done! The workflow has been updated. Changes are immediately available through the MCP server. diff --git a/tests/unit/jobs/mcp/test_server.py b/tests/unit/jobs/mcp/test_server.py new file mode 100644 index 00000000..d029a8d7 --- /dev/null +++ b/tests/unit/jobs/mcp/test_server.py @@ -0,0 +1,84 @@ +"""Tests for MCP server creation and startup behavior.""" + +from pathlib import Path +from unittest.mock import patch + +from deepwork.jobs.mcp.server import _ensure_schema_available, create_server + + +class TestEnsureSchemaAvailable: + """Tests for JOBS-REQ-001.1.13: schema copy on startup.""" + + # THIS TEST VALIDATES A HARD REQUIREMENT (JOBS-REQ-001.1.13). + # YOU MUST NOT MODIFY THIS TEST UNLESS THE REQUIREMENT CHANGES + def test_copies_schema_to_deepwork_dir(self, tmp_path: Path) -> None: + """Schema file is copied to .deepwork/job.schema.json.""" + _ensure_schema_available(tmp_path) + + target = tmp_path / ".deepwork" / "job.schema.json" + assert target.exists(), "Schema should be copied to .deepwork/job.schema.json" + assert target.stat().st_size > 0, "Copied schema should not be empty" + + # THIS TEST VALIDATES A HARD REQUIREMENT (JOBS-REQ-001.1.13). + # YOU MUST NOT MODIFY THIS TEST UNLESS THE REQUIREMENT CHANGES + def test_overwrites_existing_stale_schema(self, tmp_path: Path) -> None: + """An existing (stale) schema file at the target path is overwritten.""" + deepwork_dir = tmp_path / ".deepwork" + deepwork_dir.mkdir() + target = deepwork_dir / "job.schema.json" + target.write_text('{"stale": true}') + + _ensure_schema_available(tmp_path) + + content = target.read_text() + assert '"stale"' not in content, "Stale schema should be overwritten" + assert len(content) > 20, "Overwritten schema should contain real content" + + # THIS TEST VALIDATES A HARD REQUIREMENT (JOBS-REQ-001.1.13). + # YOU MUST NOT MODIFY THIS TEST UNLESS THE REQUIREMENT CHANGES + def test_creates_deepwork_dir_if_missing(self, tmp_path: Path) -> None: + """The .deepwork/ directory is created if it does not exist.""" + project = tmp_path / "new_project" + project.mkdir() + + _ensure_schema_available(project) + + assert (project / ".deepwork" / "job.schema.json").exists() + + # THIS TEST VALIDATES A HARD REQUIREMENT (JOBS-REQ-001.1.13). + # YOU MUST NOT MODIFY THIS TEST UNLESS THE REQUIREMENT CHANGES + def test_logs_warning_on_failure(self, tmp_path: Path) -> None: + """A warning is logged if the copy fails, but no exception is raised.""" + with patch( + "deepwork.jobs.mcp.server.shutil.copy2", side_effect=OSError("permission denied") + ): + # Should not raise + _ensure_schema_available(tmp_path) + + def test_schema_content_matches_source(self, tmp_path: Path) -> None: + """Copied schema matches the bundled source file.""" + from deepwork.jobs.schema import get_schema_path + + _ensure_schema_available(tmp_path) + + source_content = get_schema_path().read_text() + target_content = (tmp_path / ".deepwork" / "job.schema.json").read_text() + assert source_content == target_content + + +class TestCreateServerSchemaSetup: + """Test that create_server copies the schema on startup.""" + + # THIS TEST VALIDATES A HARD REQUIREMENT (JOBS-REQ-001.1.13). + # YOU MUST NOT MODIFY THIS TEST UNLESS THE REQUIREMENT CHANGES + def test_create_server_copies_schema(self, tmp_path: Path) -> None: + """create_server copies job.schema.json to .deepwork/ on startup.""" + create_server( + project_root=tmp_path, + enable_quality_gate=False, + ) + + target = tmp_path / ".deepwork" / "job.schema.json" + assert target.exists(), ( + "create_server must copy job.schema.json to .deepwork/job.schema.json on startup" + )