From 17b7e3a9ec7c54beccac2cef56021402cedf4b95 Mon Sep 17 00:00:00 2001 From: Tejas Saubhage Date: Wed, 22 Apr 2026 16:53:03 -0400 Subject: [PATCH 1/3] maint: add unit tests for build scripts (prepare_package, validate_docs, generate_rss, check_links) --- tests/unit/build_scripts/test_check_links.py | 84 ++++++++++++++++++ tests/unit/build_scripts/test_generate_rss.py | 60 +++++++++++++ .../build_scripts/test_prepare_package.py | 88 +++++++++++++++++++ .../unit/build_scripts/test_validate_docs.py | 86 ++++++++++++++++++ 4 files changed, 318 insertions(+) create mode 100644 tests/unit/build_scripts/test_check_links.py create mode 100644 tests/unit/build_scripts/test_generate_rss.py create mode 100644 tests/unit/build_scripts/test_prepare_package.py create mode 100644 tests/unit/build_scripts/test_validate_docs.py diff --git a/tests/unit/build_scripts/test_check_links.py b/tests/unit/build_scripts/test_check_links.py new file mode 100644 index 000000000..658a6dc4d --- /dev/null +++ b/tests/unit/build_scripts/test_check_links.py @@ -0,0 +1,84 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import tempfile +from pathlib import Path + +import pytest + +from build_scripts.check_links import extract_urls, resolve_relative_url, strip_fragment + + +class TestStripFragment: + def test_removes_fragment(self) -> None: + assert strip_fragment("https://example.com/page#section") == "https://example.com/page" + + def test_no_fragment_unchanged(self) -> None: + assert strip_fragment("https://example.com/page") == "https://example.com/page" + + def test_empty_fragment(self) -> None: + assert strip_fragment("https://example.com/page#") == "https://example.com/page" + + def test_preserves_query_string(self) -> None: + result = strip_fragment("https://example.com/page?q=1#section") + assert "q=1" in result + assert "section" not in result + + +class TestResolveRelativeUrl: + def test_http_url_unchanged(self) -> None: + url = "https://example.com" + assert resolve_relative_url("/some/file.md", url) == url + + def test_mailto_unchanged(self) -> None: + url = "mailto:test@example.com" + assert resolve_relative_url("/some/file.md", url) == url + + def test_relative_url_resolved(self, tmp_path: Path) -> None: + base = str(tmp_path / "docs" / "file.md") + target = str(tmp_path / "docs" / "other.md") + Path(target).parent.mkdir(parents=True, exist_ok=True) + Path(target).write_text("# Other") + result = resolve_relative_url(base, "other.md") + assert "other" in result + + def test_relative_url_with_md_extension(self, tmp_path: Path) -> None: + base = str(tmp_path / "docs" / "file.md") + target = tmp_path / "docs" / "other.md" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text("# Other") + result = resolve_relative_url(base, "other") + assert result.endswith(".md") + + +class TestExtractUrls: + def test_extracts_markdown_links(self, tmp_path: Path) -> None: + f = tmp_path / "test.md" + f.write_text("[Click here](https://example.com)") + urls = extract_urls(str(f)) + assert "https://example.com" in urls + + def test_extracts_href_links(self, tmp_path: Path) -> None: + f = tmp_path / "test.html" + f.write_text('link') + urls = extract_urls(str(f)) + assert "https://example.com" in urls + + def test_extracts_src_links(self, tmp_path: Path) -> None: + f = tmp_path / "test.html" + f.write_text('') + urls = extract_urls(str(f)) + assert "https://example.com/image.png" in urls + + def test_empty_file_returns_no_urls(self, tmp_path: Path) -> None: + f = tmp_path / "empty.md" + f.write_text("") + urls = extract_urls(str(f)) + assert urls == [] + + def test_strips_fragments_from_extracted_urls(self, tmp_path: Path) -> None: + f = tmp_path / "test.md" + f.write_text("[link](https://example.com/page#section)") + urls = extract_urls(str(f)) + assert "https://example.com/page" in urls + assert not any("#section" in u for u in urls) diff --git a/tests/unit/build_scripts/test_generate_rss.py b/tests/unit/build_scripts/test_generate_rss.py new file mode 100644 index 000000000..44f25d3be --- /dev/null +++ b/tests/unit/build_scripts/test_generate_rss.py @@ -0,0 +1,60 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import tempfile +from pathlib import Path + +import pytest + +from build_scripts.generate_rss import extract_date_from_filename, parse_blog_markdown + + +class TestExtractDateFromFilename: + def test_standard_date(self) -> None: + assert extract_date_from_filename("2024_12_3.md") == "2024-12-03" + + def test_double_digit_day_and_month(self) -> None: + assert extract_date_from_filename("2023_11_25.md") == "2023-11-25" + + def test_single_digit_month(self) -> None: + assert extract_date_from_filename("2024_1_15.md") == "2024-01-15" + + def test_returns_empty_for_invalid_filename(self) -> None: + assert extract_date_from_filename("no_date_here.md") == "" + + def test_returns_empty_for_non_numeric(self) -> None: + assert extract_date_from_filename("intro.md") == "" + + +class TestParseBlogMarkdown: + def test_extracts_title(self, tmp_path: Path) -> None: + f = tmp_path / "2024_01_01.md" + f.write_text("# My Blog Title\n\nSome description here.") + title, _ = parse_blog_markdown(f) + assert title == "My Blog Title" + + def test_extracts_description(self, tmp_path: Path) -> None: + f = tmp_path / "2024_01_01.md" + f.write_text("# Title\n\nThis is the description paragraph.") + _, desc = parse_blog_markdown(f) + assert "This is the description paragraph." in desc + + def test_skips_small_tag_in_description(self, tmp_path: Path) -> None: + f = tmp_path / "2024_01_01.md" + f.write_text("# Title\n\ndate info\n\nReal description here.") + _, desc = parse_blog_markdown(f) + assert "small" not in desc + assert "Real description here." in desc + + def test_empty_title_when_no_heading(self, tmp_path: Path) -> None: + f = tmp_path / "2024_01_01.md" + f.write_text("No heading here.\n\nJust paragraphs.") + title, _ = parse_blog_markdown(f) + assert title == "" + + def test_multiline_description_joined(self, tmp_path: Path) -> None: + f = tmp_path / "2024_01_01.md" + f.write_text("# Title\n\nLine one.\nLine two.") + _, desc = parse_blog_markdown(f) + assert "Line one." in desc + assert "Line two." in desc diff --git a/tests/unit/build_scripts/test_prepare_package.py b/tests/unit/build_scripts/test_prepare_package.py new file mode 100644 index 000000000..047d9767c --- /dev/null +++ b/tests/unit/build_scripts/test_prepare_package.py @@ -0,0 +1,88 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import shutil +import tempfile +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from build_scripts.prepare_package import build_frontend, copy_frontend_to_package + + +class TestBuildFrontend: + def test_returns_false_when_npm_not_found(self, tmp_path: Path) -> None: + with patch("subprocess.run", side_effect=FileNotFoundError): + result = build_frontend(tmp_path) + assert result is False + + def test_returns_false_when_package_json_missing(self, tmp_path: Path) -> None: + mock_run = MagicMock() + mock_run.return_value.stdout = "10.0.0\n" + with patch("subprocess.run", mock_run): + result = build_frontend(tmp_path) + assert result is False + + def test_returns_false_when_npm_install_fails(self, tmp_path: Path) -> None: + import subprocess + (tmp_path / "package.json").write_text("{}") + responses = [ + MagicMock(stdout="10.0.0\n"), + subprocess.CalledProcessError(1, "npm install", output="error"), + ] + with patch("subprocess.run", side_effect=responses): + result = build_frontend(tmp_path) + assert result is False + + def test_returns_false_when_npm_build_fails(self, tmp_path: Path) -> None: + import subprocess + (tmp_path / "package.json").write_text("{}") + responses = [ + MagicMock(stdout="10.0.0\n"), + MagicMock(), + subprocess.CalledProcessError(1, "npm run build", output="error"), + ] + with patch("subprocess.run", side_effect=responses): + result = build_frontend(tmp_path) + assert result is False + + def test_returns_true_when_build_succeeds(self, tmp_path: Path) -> None: + (tmp_path / "package.json").write_text("{}") + with patch("subprocess.run", return_value=MagicMock(stdout="10.0.0\n")): + result = build_frontend(tmp_path) + assert result is True + + +class TestCopyFrontendToPackage(object): + def test_returns_false_when_dist_missing(self, tmp_path: Path) -> None: + result = copy_frontend_to_package(tmp_path / "dist", tmp_path / "out") + assert result is False + + def test_returns_false_when_index_html_missing(self, tmp_path: Path) -> None: + dist = tmp_path / "dist" + dist.mkdir() + (dist / "main.js").write_text("console.log('hi')") + out = tmp_path / "out" + result = copy_frontend_to_package(dist, out) + assert result is False + + def test_returns_true_when_copy_succeeds(self, tmp_path: Path) -> None: + dist = tmp_path / "dist" + dist.mkdir() + (dist / "index.html").write_text("") + out = tmp_path / "out" + result = copy_frontend_to_package(dist, out) + assert result is True + assert (out / "index.html").exists() + + def test_removes_existing_output_dir(self, tmp_path: Path) -> None: + dist = tmp_path / "dist" + dist.mkdir() + (dist / "index.html").write_text("") + out = tmp_path / "out" + out.mkdir() + (out / "old_file.txt").write_text("old") + copy_frontend_to_package(dist, out) + assert not (out / "old_file.txt").exists() + assert (out / "index.html").exists() diff --git a/tests/unit/build_scripts/test_validate_docs.py b/tests/unit/build_scripts/test_validate_docs.py new file mode 100644 index 000000000..bea434a49 --- /dev/null +++ b/tests/unit/build_scripts/test_validate_docs.py @@ -0,0 +1,86 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import tempfile +from pathlib import Path + +import pytest + +from build_scripts.validate_docs import find_orphaned_files, parse_toc_files, validate_toc_files + + +class TestParseTocFiles: + def test_extracts_single_file(self) -> None: + toc = [{"file": "intro"}] + result = parse_toc_files(toc) + assert "intro" in result + + def test_extracts_nested_children(self) -> None: + toc = [{"file": "parent", "children": [{"file": "child"}]}] + result = parse_toc_files(toc) + assert "parent" in result + assert "child" in result + + def test_ignores_entries_without_file(self) -> None: + toc = [{"title": "No file here"}] + result = parse_toc_files(toc) + assert len(result) == 0 + + def test_empty_toc(self) -> None: + result = parse_toc_files([]) + assert result == set() + + def test_normalizes_backslashes(self) -> None: + toc = [{"file": "setup\\install"}] + result = parse_toc_files(toc) + assert "setup/install" in result + + +class TestValidateTocFiles: + def test_no_errors_when_files_exist(self, tmp_path: Path) -> None: + (tmp_path / "intro.md").write_text("# Intro") + errors = validate_toc_files({"intro.md"}, tmp_path) + assert errors == [] + + def test_error_when_file_missing(self, tmp_path: Path) -> None: + errors = validate_toc_files({"missing.md"}, tmp_path) + assert len(errors) == 1 + assert "missing.md" in errors[0] + + def test_skips_api_generated_files(self, tmp_path: Path) -> None: + errors = validate_toc_files({"api/some_module"}, tmp_path) + assert errors == [] + + def test_multiple_missing_files(self, tmp_path: Path) -> None: + errors = validate_toc_files({"a.md", "b.md"}, tmp_path) + assert len(errors) == 2 + + +class TestFindOrphanedFiles: + def test_no_orphans_when_all_referenced(self, tmp_path: Path) -> None: + (tmp_path / "intro.md").write_text("# Intro") + orphaned = find_orphaned_files({"intro.md"}, tmp_path) + assert orphaned == [] + + def test_detects_orphaned_markdown(self, tmp_path: Path) -> None: + (tmp_path / "orphan.md").write_text("# Orphan") + orphaned = find_orphaned_files(set(), tmp_path) + assert any("orphan.md" in o for o in orphaned) + + def test_skips_build_directory(self, tmp_path: Path) -> None: + build_dir = tmp_path / "_build" + build_dir.mkdir() + (build_dir / "generated.md").write_text("# Generated") + orphaned = find_orphaned_files(set(), tmp_path) + assert not any("_build" in o for o in orphaned) + + def test_skips_myst_yml(self, tmp_path: Path) -> None: + (tmp_path / "myst.yml").write_text("project:") + orphaned = find_orphaned_files(set(), tmp_path) + assert not any("myst.yml" in o for o in orphaned) + + def test_skips_py_companion_files(self, tmp_path: Path) -> None: + (tmp_path / "notebook.ipynb").write_text("{}") + (tmp_path / "notebook.py").write_text("# companion") + orphaned = find_orphaned_files(set(), tmp_path) + assert not any("notebook.py" in o for o in orphaned) From 1620508e7a4f1057953797d48b1dee10d4be1bfa Mon Sep 17 00:00:00 2001 From: Tejas Saubhage Date: Thu, 23 Apr 2026 03:28:17 -0400 Subject: [PATCH 2/3] =?UTF-8?q?maint:=20fix=20Copilot=20review=20comments?= =?UTF-8?q?=20=E2=80=94=20patch=20shutil.which,=20fix=20imports,=20safe=20?= =?UTF-8?q?rss=20loading?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/unit/build_scripts/test_generate_rss.py | 26 ++++++++++++-- .../build_scripts/test_prepare_package.py | 36 +++++++++---------- 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/tests/unit/build_scripts/test_generate_rss.py b/tests/unit/build_scripts/test_generate_rss.py index 44f25d3be..88f9d63e1 100644 --- a/tests/unit/build_scripts/test_generate_rss.py +++ b/tests/unit/build_scripts/test_generate_rss.py @@ -1,12 +1,32 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -import tempfile +import ast +from collections.abc import Callable from pathlib import Path +from typing import Any -import pytest -from build_scripts.generate_rss import extract_date_from_filename, parse_blog_markdown +def _load_generate_rss_functions() -> tuple[Callable[[str], str], Callable[[Path], tuple[str, str]]]: + """Load generate_rss helpers without executing the script body.""" + script_path = Path(__file__).resolve().parents[3] / "build_scripts" / "generate_rss.py" + source = script_path.read_text(encoding="utf-8") + parsed_module = ast.parse(source, filename=str(script_path)) + target_functions = {"extract_date_from_filename", "parse_blog_markdown"} + selected_nodes: list[ast.stmt] = [] + for node in parsed_module.body: + if isinstance(node, (ast.Import, ast.ImportFrom)): + selected_nodes.append(node) + continue + if isinstance(node, ast.FunctionDef) and node.name in target_functions: + selected_nodes.append(node) + safe_module = ast.Module(body=selected_nodes, type_ignores=[]) + namespace: dict[str, Any] = {} + exec(compile(safe_module, filename=str(script_path), mode="exec"), namespace) + return namespace["extract_date_from_filename"], namespace["parse_blog_markdown"] + + +extract_date_from_filename, parse_blog_markdown = _load_generate_rss_functions() class TestExtractDateFromFilename: diff --git a/tests/unit/build_scripts/test_prepare_package.py b/tests/unit/build_scripts/test_prepare_package.py index 047d9767c..53f5026c7 100644 --- a/tests/unit/build_scripts/test_prepare_package.py +++ b/tests/unit/build_scripts/test_prepare_package.py @@ -1,60 +1,57 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -import shutil -import tempfile +import subprocess from pathlib import Path from unittest.mock import MagicMock, patch -import pytest - from build_scripts.prepare_package import build_frontend, copy_frontend_to_package class TestBuildFrontend: def test_returns_false_when_npm_not_found(self, tmp_path: Path) -> None: - with patch("subprocess.run", side_effect=FileNotFoundError): + with patch("build_scripts.prepare_package.shutil.which", return_value=None): result = build_frontend(tmp_path) assert result is False def test_returns_false_when_package_json_missing(self, tmp_path: Path) -> None: - mock_run = MagicMock() - mock_run.return_value.stdout = "10.0.0\n" - with patch("subprocess.run", mock_run): - result = build_frontend(tmp_path) + with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"): + with patch("build_scripts.prepare_package.subprocess.run", return_value=MagicMock(stdout="10.0.0\n")): + result = build_frontend(tmp_path) assert result is False def test_returns_false_when_npm_install_fails(self, tmp_path: Path) -> None: - import subprocess (tmp_path / "package.json").write_text("{}") responses = [ MagicMock(stdout="10.0.0\n"), subprocess.CalledProcessError(1, "npm install", output="error"), ] - with patch("subprocess.run", side_effect=responses): - result = build_frontend(tmp_path) + with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"): + with patch("build_scripts.prepare_package.subprocess.run", side_effect=responses): + result = build_frontend(tmp_path) assert result is False def test_returns_false_when_npm_build_fails(self, tmp_path: Path) -> None: - import subprocess (tmp_path / "package.json").write_text("{}") responses = [ MagicMock(stdout="10.0.0\n"), MagicMock(), subprocess.CalledProcessError(1, "npm run build", output="error"), ] - with patch("subprocess.run", side_effect=responses): - result = build_frontend(tmp_path) + with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"): + with patch("build_scripts.prepare_package.subprocess.run", side_effect=responses): + result = build_frontend(tmp_path) assert result is False def test_returns_true_when_build_succeeds(self, tmp_path: Path) -> None: (tmp_path / "package.json").write_text("{}") - with patch("subprocess.run", return_value=MagicMock(stdout="10.0.0\n")): - result = build_frontend(tmp_path) + with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"): + with patch("build_scripts.prepare_package.subprocess.run", return_value=MagicMock(stdout="10.0.0\n")): + result = build_frontend(tmp_path) assert result is True -class TestCopyFrontendToPackage(object): +class TestCopyFrontendToPackage: def test_returns_false_when_dist_missing(self, tmp_path: Path) -> None: result = copy_frontend_to_package(tmp_path / "dist", tmp_path / "out") assert result is False @@ -63,8 +60,7 @@ def test_returns_false_when_index_html_missing(self, tmp_path: Path) -> None: dist = tmp_path / "dist" dist.mkdir() (dist / "main.js").write_text("console.log('hi')") - out = tmp_path / "out" - result = copy_frontend_to_package(dist, out) + result = copy_frontend_to_package(dist, tmp_path / "out") assert result is False def test_returns_true_when_copy_succeeds(self, tmp_path: Path) -> None: From 4ff603b6ec435103136c01dbcc8beb679fc3898d Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Thu, 30 Apr 2026 07:15:32 -0700 Subject: [PATCH 3/3] Address code review findings for build script tests - Refactor generate_rss.py: wrap side effects under if __name__ == '__main__' so the module is cleanly importable without AST exec() workaround - Replace fragile AST exec() loader in test_generate_rss.py with direct import - Convert all test classes to standalone functions per test conventions - Fix test_build_frontend_returns_true_when_build_succeeds: use side_effect with 3 explicit responses and verify call_count == 3 - Strengthen weak assertion in test_resolve_relative_url_resolved to check exact path instead of loose substring match - Remove unused imports (tempfile, pytest) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- build_scripts/generate_rss.py | 119 ++++++------- tests/unit/build_scripts/test_check_links.py | 156 ++++++++--------- tests/unit/build_scripts/test_generate_rss.py | 135 +++++++-------- .../build_scripts/test_prepare_package.py | 157 +++++++++-------- .../unit/build_scripts/test_validate_docs.py | 161 +++++++++--------- 5 files changed, 366 insertions(+), 362 deletions(-) diff --git a/build_scripts/generate_rss.py b/build_scripts/generate_rss.py index 4a11fda4b..cb8629ddf 100644 --- a/build_scripts/generate_rss.py +++ b/build_scripts/generate_rss.py @@ -66,62 +66,63 @@ def extract_date_from_filename(filename: str) -> str: return f"{year}-{int(month):02d}-{int(day):02d}" -# Generate the RSS feed structure -print("Generating RSS feed structure...") -fg = FeedGenerator() -fg.link(href="https://microsoft.github.io/PyRIT/blog/rss.xml", rel="self") -fg.title("PyRIT Blog") -fg.description("PyRIT Blog") -fg.logo("https://microsoft.github.io/PyRIT/_static/roakey.png") -fg.language("en") - -# Iterate over the blog source markdown files -print("Pulling blog files...") -if not BLOG_SOURCE_DIR.exists(): - print(f"Error: Blog source directory {BLOG_SOURCE_DIR} not found. Exiting.") - sys.exit(1) - -files = [f for f in BLOG_SOURCE_DIR.iterdir() if f.is_file() and f.name.startswith("20") and f.suffix == ".md"] -if len(files) == 0: - print("Error: No blog files found. Exiting.") - sys.exit(1) -files.sort(key=lambda x: x.name) - -# Add a feed entry for each file -for file in files: - print(f"Parsing {file.name}...") - fe = fg.add_entry() - # Blog pages are served at blog/ - page_name = file.stem - fe.link(href=f"https://microsoft.github.io/PyRIT/blog/{page_name}") - fe.guid(f"https://microsoft.github.io/PyRIT/blog/{page_name}") - - title, description = parse_blog_markdown(file) - fe.title(title) - fe.description(description) - - pub_date = extract_date_from_filename(file.name) - if pub_date: - fe.pubDate(f"{pub_date}T10:00:00Z") - -# Validating the RSS feed -print("Validating RSS feed...") -first_entry = fg.entry()[-1] -if first_entry.title() != "Multi-Turn orchestrators": - print(f"Error: Title parsing failed. Got: {first_entry.title()!r}. Exiting.") - sys.exit(1) -expected_desc_start = "In PyRIT, orchestrators are typically seen as the top-level component." -if not first_entry.description().startswith(expected_desc_start): - print(f"Error: Description parsing failed. Got: {first_entry.description()[:80]!r}. Exiting.") - sys.exit(1) - -# Export the RSS feed -print("Exporting RSS feed...") -RSS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) -rss_path = RSS_OUTPUT_DIR / "rss.xml" -fg.rss_file(str(rss_path), pretty=True) -if not rss_path.exists() or rss_path.stat().st_size == 0: - print("Error: RSS feed export failed. Exiting.") - sys.exit(1) - -print("RSS feed generated and exported successfully.") +if __name__ == "__main__": + # Generate the RSS feed structure + print("Generating RSS feed structure...") + fg = FeedGenerator() + fg.link(href="https://microsoft.github.io/PyRIT/blog/rss.xml", rel="self") + fg.title("PyRIT Blog") + fg.description("PyRIT Blog") + fg.logo("https://microsoft.github.io/PyRIT/_static/roakey.png") + fg.language("en") + + # Iterate over the blog source markdown files + print("Pulling blog files...") + if not BLOG_SOURCE_DIR.exists(): + print(f"Error: Blog source directory {BLOG_SOURCE_DIR} not found. Exiting.") + sys.exit(1) + + files = [f for f in BLOG_SOURCE_DIR.iterdir() if f.is_file() and f.name.startswith("20") and f.suffix == ".md"] + if len(files) == 0: + print("Error: No blog files found. Exiting.") + sys.exit(1) + files.sort(key=lambda x: x.name) + + # Add a feed entry for each file + for file in files: + print(f"Parsing {file.name}...") + fe = fg.add_entry() + # Blog pages are served at blog/ + page_name = file.stem + fe.link(href=f"https://microsoft.github.io/PyRIT/blog/{page_name}") + fe.guid(f"https://microsoft.github.io/PyRIT/blog/{page_name}") + + title, description = parse_blog_markdown(file) + fe.title(title) + fe.description(description) + + pub_date = extract_date_from_filename(file.name) + if pub_date: + fe.pubDate(f"{pub_date}T10:00:00Z") + + # Validating the RSS feed + print("Validating RSS feed...") + first_entry = fg.entry()[-1] + if first_entry.title() != "Multi-Turn orchestrators": + print(f"Error: Title parsing failed. Got: {first_entry.title()!r}. Exiting.") + sys.exit(1) + expected_desc_start = "In PyRIT, orchestrators are typically seen as the top-level component." + if not first_entry.description().startswith(expected_desc_start): + print(f"Error: Description parsing failed. Got: {first_entry.description()[:80]!r}. Exiting.") + sys.exit(1) + + # Export the RSS feed + print("Exporting RSS feed...") + RSS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + rss_path = RSS_OUTPUT_DIR / "rss.xml" + fg.rss_file(str(rss_path), pretty=True) + if not rss_path.exists() or rss_path.stat().st_size == 0: + print("Error: RSS feed export failed. Exiting.") + sys.exit(1) + + print("RSS feed generated and exported successfully.") diff --git a/tests/unit/build_scripts/test_check_links.py b/tests/unit/build_scripts/test_check_links.py index 658a6dc4d..75bc4aec4 100644 --- a/tests/unit/build_scripts/test_check_links.py +++ b/tests/unit/build_scripts/test_check_links.py @@ -1,84 +1,88 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -import tempfile from pathlib import Path -import pytest - from build_scripts.check_links import extract_urls, resolve_relative_url, strip_fragment -class TestStripFragment: - def test_removes_fragment(self) -> None: - assert strip_fragment("https://example.com/page#section") == "https://example.com/page" - - def test_no_fragment_unchanged(self) -> None: - assert strip_fragment("https://example.com/page") == "https://example.com/page" - - def test_empty_fragment(self) -> None: - assert strip_fragment("https://example.com/page#") == "https://example.com/page" - - def test_preserves_query_string(self) -> None: - result = strip_fragment("https://example.com/page?q=1#section") - assert "q=1" in result - assert "section" not in result - - -class TestResolveRelativeUrl: - def test_http_url_unchanged(self) -> None: - url = "https://example.com" - assert resolve_relative_url("/some/file.md", url) == url - - def test_mailto_unchanged(self) -> None: - url = "mailto:test@example.com" - assert resolve_relative_url("/some/file.md", url) == url - - def test_relative_url_resolved(self, tmp_path: Path) -> None: - base = str(tmp_path / "docs" / "file.md") - target = str(tmp_path / "docs" / "other.md") - Path(target).parent.mkdir(parents=True, exist_ok=True) - Path(target).write_text("# Other") - result = resolve_relative_url(base, "other.md") - assert "other" in result - - def test_relative_url_with_md_extension(self, tmp_path: Path) -> None: - base = str(tmp_path / "docs" / "file.md") - target = tmp_path / "docs" / "other.md" - target.parent.mkdir(parents=True, exist_ok=True) - target.write_text("# Other") - result = resolve_relative_url(base, "other") - assert result.endswith(".md") - - -class TestExtractUrls: - def test_extracts_markdown_links(self, tmp_path: Path) -> None: - f = tmp_path / "test.md" - f.write_text("[Click here](https://example.com)") - urls = extract_urls(str(f)) - assert "https://example.com" in urls - - def test_extracts_href_links(self, tmp_path: Path) -> None: - f = tmp_path / "test.html" - f.write_text('link') - urls = extract_urls(str(f)) - assert "https://example.com" in urls - - def test_extracts_src_links(self, tmp_path: Path) -> None: - f = tmp_path / "test.html" - f.write_text('') - urls = extract_urls(str(f)) - assert "https://example.com/image.png" in urls - - def test_empty_file_returns_no_urls(self, tmp_path: Path) -> None: - f = tmp_path / "empty.md" - f.write_text("") - urls = extract_urls(str(f)) - assert urls == [] - - def test_strips_fragments_from_extracted_urls(self, tmp_path: Path) -> None: - f = tmp_path / "test.md" - f.write_text("[link](https://example.com/page#section)") - urls = extract_urls(str(f)) - assert "https://example.com/page" in urls - assert not any("#section" in u for u in urls) +def test_strip_fragment_removes_fragment() -> None: + assert strip_fragment("https://example.com/page#section") == "https://example.com/page" + + +def test_strip_fragment_no_fragment_unchanged() -> None: + assert strip_fragment("https://example.com/page") == "https://example.com/page" + + +def test_strip_fragment_empty_fragment() -> None: + assert strip_fragment("https://example.com/page#") == "https://example.com/page" + + +def test_strip_fragment_preserves_query_string() -> None: + result = strip_fragment("https://example.com/page?q=1#section") + assert "q=1" in result + assert "section" not in result + + +def test_resolve_relative_url_http_url_unchanged() -> None: + url = "https://example.com" + assert resolve_relative_url("/some/file.md", url) == url + + +def test_resolve_relative_url_mailto_unchanged() -> None: + url = "mailto:test@example.com" + assert resolve_relative_url("/some/file.md", url) == url + + +def test_resolve_relative_url_resolved(tmp_path: Path) -> None: + base = str(tmp_path / "docs" / "file.md") + target = tmp_path / "docs" / "other.md" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text("# Other") + result = resolve_relative_url(base, "other.md") + assert result == str(target) + + +def test_resolve_relative_url_with_md_extension(tmp_path: Path) -> None: + base = str(tmp_path / "docs" / "file.md") + target = tmp_path / "docs" / "other.md" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text("# Other") + result = resolve_relative_url(base, "other") + assert result.endswith(".md") + + +def test_extract_urls_extracts_markdown_links(tmp_path: Path) -> None: + f = tmp_path / "test.md" + f.write_text("[Click here](https://example.com)") + urls = extract_urls(str(f)) + assert "https://example.com" in urls + + +def test_extract_urls_extracts_href_links(tmp_path: Path) -> None: + f = tmp_path / "test.html" + f.write_text('link') + urls = extract_urls(str(f)) + assert "https://example.com" in urls + + +def test_extract_urls_extracts_src_links(tmp_path: Path) -> None: + f = tmp_path / "test.html" + f.write_text('') + urls = extract_urls(str(f)) + assert "https://example.com/image.png" in urls + + +def test_extract_urls_empty_file_returns_no_urls(tmp_path: Path) -> None: + f = tmp_path / "empty.md" + f.write_text("") + urls = extract_urls(str(f)) + assert urls == [] + + +def test_extract_urls_strips_fragments(tmp_path: Path) -> None: + f = tmp_path / "test.md" + f.write_text("[link](https://example.com/page#section)") + urls = extract_urls(str(f)) + assert "https://example.com/page" in urls + assert not any("#section" in u for u in urls) diff --git a/tests/unit/build_scripts/test_generate_rss.py b/tests/unit/build_scripts/test_generate_rss.py index 88f9d63e1..b543f82c3 100644 --- a/tests/unit/build_scripts/test_generate_rss.py +++ b/tests/unit/build_scripts/test_generate_rss.py @@ -1,80 +1,63 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -import ast -from collections.abc import Callable from pathlib import Path -from typing import Any - - -def _load_generate_rss_functions() -> tuple[Callable[[str], str], Callable[[Path], tuple[str, str]]]: - """Load generate_rss helpers without executing the script body.""" - script_path = Path(__file__).resolve().parents[3] / "build_scripts" / "generate_rss.py" - source = script_path.read_text(encoding="utf-8") - parsed_module = ast.parse(source, filename=str(script_path)) - target_functions = {"extract_date_from_filename", "parse_blog_markdown"} - selected_nodes: list[ast.stmt] = [] - for node in parsed_module.body: - if isinstance(node, (ast.Import, ast.ImportFrom)): - selected_nodes.append(node) - continue - if isinstance(node, ast.FunctionDef) and node.name in target_functions: - selected_nodes.append(node) - safe_module = ast.Module(body=selected_nodes, type_ignores=[]) - namespace: dict[str, Any] = {} - exec(compile(safe_module, filename=str(script_path), mode="exec"), namespace) - return namespace["extract_date_from_filename"], namespace["parse_blog_markdown"] - - -extract_date_from_filename, parse_blog_markdown = _load_generate_rss_functions() - - -class TestExtractDateFromFilename: - def test_standard_date(self) -> None: - assert extract_date_from_filename("2024_12_3.md") == "2024-12-03" - - def test_double_digit_day_and_month(self) -> None: - assert extract_date_from_filename("2023_11_25.md") == "2023-11-25" - - def test_single_digit_month(self) -> None: - assert extract_date_from_filename("2024_1_15.md") == "2024-01-15" - - def test_returns_empty_for_invalid_filename(self) -> None: - assert extract_date_from_filename("no_date_here.md") == "" - - def test_returns_empty_for_non_numeric(self) -> None: - assert extract_date_from_filename("intro.md") == "" - - -class TestParseBlogMarkdown: - def test_extracts_title(self, tmp_path: Path) -> None: - f = tmp_path / "2024_01_01.md" - f.write_text("# My Blog Title\n\nSome description here.") - title, _ = parse_blog_markdown(f) - assert title == "My Blog Title" - - def test_extracts_description(self, tmp_path: Path) -> None: - f = tmp_path / "2024_01_01.md" - f.write_text("# Title\n\nThis is the description paragraph.") - _, desc = parse_blog_markdown(f) - assert "This is the description paragraph." in desc - - def test_skips_small_tag_in_description(self, tmp_path: Path) -> None: - f = tmp_path / "2024_01_01.md" - f.write_text("# Title\n\ndate info\n\nReal description here.") - _, desc = parse_blog_markdown(f) - assert "small" not in desc - assert "Real description here." in desc - - def test_empty_title_when_no_heading(self, tmp_path: Path) -> None: - f = tmp_path / "2024_01_01.md" - f.write_text("No heading here.\n\nJust paragraphs.") - title, _ = parse_blog_markdown(f) - assert title == "" - - def test_multiline_description_joined(self, tmp_path: Path) -> None: - f = tmp_path / "2024_01_01.md" - f.write_text("# Title\n\nLine one.\nLine two.") - _, desc = parse_blog_markdown(f) - assert "Line one." in desc - assert "Line two." in desc + +from build_scripts.generate_rss import extract_date_from_filename, parse_blog_markdown + + +def test_extract_date_from_filename_standard_date() -> None: + assert extract_date_from_filename("2024_12_3.md") == "2024-12-03" + + +def test_extract_date_from_filename_double_digit_day_and_month() -> None: + assert extract_date_from_filename("2023_11_25.md") == "2023-11-25" + + +def test_extract_date_from_filename_single_digit_month() -> None: + assert extract_date_from_filename("2024_1_15.md") == "2024-01-15" + + +def test_extract_date_from_filename_returns_empty_for_invalid() -> None: + assert extract_date_from_filename("no_date_here.md") == "" + + +def test_extract_date_from_filename_returns_empty_for_non_numeric() -> None: + assert extract_date_from_filename("intro.md") == "" + + +def test_parse_blog_markdown_extracts_title(tmp_path: Path) -> None: + f = tmp_path / "2024_01_01.md" + f.write_text("# My Blog Title\n\nSome description here.") + title, _ = parse_blog_markdown(f) + assert title == "My Blog Title" + + +def test_parse_blog_markdown_extracts_description(tmp_path: Path) -> None: + f = tmp_path / "2024_01_01.md" + f.write_text("# Title\n\nThis is the description paragraph.") + _, desc = parse_blog_markdown(f) + assert "This is the description paragraph." in desc + + +def test_parse_blog_markdown_skips_small_tag(tmp_path: Path) -> None: + f = tmp_path / "2024_01_01.md" + f.write_text("# Title\n\ndate info\n\nReal description here.") + _, desc = parse_blog_markdown(f) + assert "small" not in desc + assert "Real description here." in desc + + +def test_parse_blog_markdown_empty_title_when_no_heading(tmp_path: Path) -> None: + f = tmp_path / "2024_01_01.md" + f.write_text("No heading here.\n\nJust paragraphs.") + title, _ = parse_blog_markdown(f) + assert title == "" + + +def test_parse_blog_markdown_multiline_description_joined(tmp_path: Path) -> None: + f = tmp_path / "2024_01_01.md" + f.write_text("# Title\n\nLine one.\nLine two.") + _, desc = parse_blog_markdown(f) + assert "Line one." in desc + assert "Line two." in desc diff --git a/tests/unit/build_scripts/test_prepare_package.py b/tests/unit/build_scripts/test_prepare_package.py index 53f5026c7..8dde85147 100644 --- a/tests/unit/build_scripts/test_prepare_package.py +++ b/tests/unit/build_scripts/test_prepare_package.py @@ -8,77 +8,88 @@ from build_scripts.prepare_package import build_frontend, copy_frontend_to_package -class TestBuildFrontend: - def test_returns_false_when_npm_not_found(self, tmp_path: Path) -> None: - with patch("build_scripts.prepare_package.shutil.which", return_value=None): +def test_build_frontend_returns_false_when_npm_not_found(tmp_path: Path) -> None: + with patch("build_scripts.prepare_package.shutil.which", return_value=None): + result = build_frontend(tmp_path) + assert result is False + + +def test_build_frontend_returns_false_when_package_json_missing(tmp_path: Path) -> None: + with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"): + with patch("build_scripts.prepare_package.subprocess.run", return_value=MagicMock(stdout="10.0.0\n")): + result = build_frontend(tmp_path) + assert result is False + + +def test_build_frontend_returns_false_when_npm_install_fails(tmp_path: Path) -> None: + (tmp_path / "package.json").write_text("{}") + responses = [ + MagicMock(stdout="10.0.0\n"), + subprocess.CalledProcessError(1, "npm install", output="error"), + ] + with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"): + with patch("build_scripts.prepare_package.subprocess.run", side_effect=responses): result = build_frontend(tmp_path) - assert result is False - - def test_returns_false_when_package_json_missing(self, tmp_path: Path) -> None: - with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"): - with patch("build_scripts.prepare_package.subprocess.run", return_value=MagicMock(stdout="10.0.0\n")): - result = build_frontend(tmp_path) - assert result is False - - def test_returns_false_when_npm_install_fails(self, tmp_path: Path) -> None: - (tmp_path / "package.json").write_text("{}") - responses = [ - MagicMock(stdout="10.0.0\n"), - subprocess.CalledProcessError(1, "npm install", output="error"), - ] - with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"): - with patch("build_scripts.prepare_package.subprocess.run", side_effect=responses): - result = build_frontend(tmp_path) - assert result is False - - def test_returns_false_when_npm_build_fails(self, tmp_path: Path) -> None: - (tmp_path / "package.json").write_text("{}") - responses = [ - MagicMock(stdout="10.0.0\n"), - MagicMock(), - subprocess.CalledProcessError(1, "npm run build", output="error"), - ] - with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"): - with patch("build_scripts.prepare_package.subprocess.run", side_effect=responses): - result = build_frontend(tmp_path) - assert result is False - - def test_returns_true_when_build_succeeds(self, tmp_path: Path) -> None: - (tmp_path / "package.json").write_text("{}") - with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"): - with patch("build_scripts.prepare_package.subprocess.run", return_value=MagicMock(stdout="10.0.0\n")): - result = build_frontend(tmp_path) - assert result is True - - -class TestCopyFrontendToPackage: - def test_returns_false_when_dist_missing(self, tmp_path: Path) -> None: - result = copy_frontend_to_package(tmp_path / "dist", tmp_path / "out") - assert result is False - - def test_returns_false_when_index_html_missing(self, tmp_path: Path) -> None: - dist = tmp_path / "dist" - dist.mkdir() - (dist / "main.js").write_text("console.log('hi')") - result = copy_frontend_to_package(dist, tmp_path / "out") - assert result is False - - def test_returns_true_when_copy_succeeds(self, tmp_path: Path) -> None: - dist = tmp_path / "dist" - dist.mkdir() - (dist / "index.html").write_text("") - out = tmp_path / "out" - result = copy_frontend_to_package(dist, out) - assert result is True - assert (out / "index.html").exists() - - def test_removes_existing_output_dir(self, tmp_path: Path) -> None: - dist = tmp_path / "dist" - dist.mkdir() - (dist / "index.html").write_text("") - out = tmp_path / "out" - out.mkdir() - (out / "old_file.txt").write_text("old") - copy_frontend_to_package(dist, out) - assert not (out / "old_file.txt").exists() - assert (out / "index.html").exists() + assert result is False + + +def test_build_frontend_returns_false_when_npm_build_fails(tmp_path: Path) -> None: + (tmp_path / "package.json").write_text("{}") + responses = [ + MagicMock(stdout="10.0.0\n"), + MagicMock(), + subprocess.CalledProcessError(1, "npm run build", output="error"), + ] + with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"): + with patch("build_scripts.prepare_package.subprocess.run", side_effect=responses): + result = build_frontend(tmp_path) + assert result is False + + +def test_build_frontend_returns_true_when_build_succeeds(tmp_path: Path) -> None: + (tmp_path / "package.json").write_text("{}") + responses = [ + MagicMock(stdout="10.0.0\n"), # npm --version + MagicMock(), # npm install + MagicMock(), # npm run build + ] + with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"): + with patch("build_scripts.prepare_package.subprocess.run", side_effect=responses) as mock_run: + result = build_frontend(tmp_path) + assert result is True + assert mock_run.call_count == 3 + + +def test_copy_frontend_returns_false_when_dist_missing(tmp_path: Path) -> None: + result = copy_frontend_to_package(tmp_path / "dist", tmp_path / "out") + assert result is False + + +def test_copy_frontend_returns_false_when_index_html_missing(tmp_path: Path) -> None: + dist = tmp_path / "dist" + dist.mkdir() + (dist / "main.js").write_text("console.log('hi')") + result = copy_frontend_to_package(dist, tmp_path / "out") + assert result is False + + +def test_copy_frontend_returns_true_when_copy_succeeds(tmp_path: Path) -> None: + dist = tmp_path / "dist" + dist.mkdir() + (dist / "index.html").write_text("") + out = tmp_path / "out" + result = copy_frontend_to_package(dist, out) + assert result is True + assert (out / "index.html").exists() + + +def test_copy_frontend_removes_existing_output_dir(tmp_path: Path) -> None: + dist = tmp_path / "dist" + dist.mkdir() + (dist / "index.html").write_text("") + out = tmp_path / "out" + out.mkdir() + (out / "old_file.txt").write_text("old") + copy_frontend_to_package(dist, out) + assert not (out / "old_file.txt").exists() + assert (out / "index.html").exists() diff --git a/tests/unit/build_scripts/test_validate_docs.py b/tests/unit/build_scripts/test_validate_docs.py index bea434a49..7a048a8cc 100644 --- a/tests/unit/build_scripts/test_validate_docs.py +++ b/tests/unit/build_scripts/test_validate_docs.py @@ -1,86 +1,91 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -import tempfile from pathlib import Path -import pytest - from build_scripts.validate_docs import find_orphaned_files, parse_toc_files, validate_toc_files -class TestParseTocFiles: - def test_extracts_single_file(self) -> None: - toc = [{"file": "intro"}] - result = parse_toc_files(toc) - assert "intro" in result - - def test_extracts_nested_children(self) -> None: - toc = [{"file": "parent", "children": [{"file": "child"}]}] - result = parse_toc_files(toc) - assert "parent" in result - assert "child" in result - - def test_ignores_entries_without_file(self) -> None: - toc = [{"title": "No file here"}] - result = parse_toc_files(toc) - assert len(result) == 0 - - def test_empty_toc(self) -> None: - result = parse_toc_files([]) - assert result == set() - - def test_normalizes_backslashes(self) -> None: - toc = [{"file": "setup\\install"}] - result = parse_toc_files(toc) - assert "setup/install" in result - - -class TestValidateTocFiles: - def test_no_errors_when_files_exist(self, tmp_path: Path) -> None: - (tmp_path / "intro.md").write_text("# Intro") - errors = validate_toc_files({"intro.md"}, tmp_path) - assert errors == [] - - def test_error_when_file_missing(self, tmp_path: Path) -> None: - errors = validate_toc_files({"missing.md"}, tmp_path) - assert len(errors) == 1 - assert "missing.md" in errors[0] - - def test_skips_api_generated_files(self, tmp_path: Path) -> None: - errors = validate_toc_files({"api/some_module"}, tmp_path) - assert errors == [] - - def test_multiple_missing_files(self, tmp_path: Path) -> None: - errors = validate_toc_files({"a.md", "b.md"}, tmp_path) - assert len(errors) == 2 - - -class TestFindOrphanedFiles: - def test_no_orphans_when_all_referenced(self, tmp_path: Path) -> None: - (tmp_path / "intro.md").write_text("# Intro") - orphaned = find_orphaned_files({"intro.md"}, tmp_path) - assert orphaned == [] - - def test_detects_orphaned_markdown(self, tmp_path: Path) -> None: - (tmp_path / "orphan.md").write_text("# Orphan") - orphaned = find_orphaned_files(set(), tmp_path) - assert any("orphan.md" in o for o in orphaned) - - def test_skips_build_directory(self, tmp_path: Path) -> None: - build_dir = tmp_path / "_build" - build_dir.mkdir() - (build_dir / "generated.md").write_text("# Generated") - orphaned = find_orphaned_files(set(), tmp_path) - assert not any("_build" in o for o in orphaned) - - def test_skips_myst_yml(self, tmp_path: Path) -> None: - (tmp_path / "myst.yml").write_text("project:") - orphaned = find_orphaned_files(set(), tmp_path) - assert not any("myst.yml" in o for o in orphaned) - - def test_skips_py_companion_files(self, tmp_path: Path) -> None: - (tmp_path / "notebook.ipynb").write_text("{}") - (tmp_path / "notebook.py").write_text("# companion") - orphaned = find_orphaned_files(set(), tmp_path) - assert not any("notebook.py" in o for o in orphaned) +def test_parse_toc_files_extracts_single_file() -> None: + toc = [{"file": "intro"}] + result = parse_toc_files(toc) + assert "intro" in result + + +def test_parse_toc_files_extracts_nested_children() -> None: + toc = [{"file": "parent", "children": [{"file": "child"}]}] + result = parse_toc_files(toc) + assert "parent" in result + assert "child" in result + + +def test_parse_toc_files_ignores_entries_without_file() -> None: + toc = [{"title": "No file here"}] + result = parse_toc_files(toc) + assert len(result) == 0 + + +def test_parse_toc_files_empty_toc() -> None: + result = parse_toc_files([]) + assert result == set() + + +def test_parse_toc_files_normalizes_backslashes() -> None: + toc = [{"file": "setup\\install"}] + result = parse_toc_files(toc) + assert "setup/install" in result + + +def test_validate_toc_files_no_errors_when_files_exist(tmp_path: Path) -> None: + (tmp_path / "intro.md").write_text("# Intro") + errors = validate_toc_files({"intro.md"}, tmp_path) + assert errors == [] + + +def test_validate_toc_files_error_when_file_missing(tmp_path: Path) -> None: + errors = validate_toc_files({"missing.md"}, tmp_path) + assert len(errors) == 1 + assert "missing.md" in errors[0] + + +def test_validate_toc_files_skips_api_generated_files(tmp_path: Path) -> None: + errors = validate_toc_files({"api/some_module"}, tmp_path) + assert errors == [] + + +def test_validate_toc_files_multiple_missing_files(tmp_path: Path) -> None: + errors = validate_toc_files({"a.md", "b.md"}, tmp_path) + assert len(errors) == 2 + + +def test_find_orphaned_files_no_orphans_when_all_referenced(tmp_path: Path) -> None: + (tmp_path / "intro.md").write_text("# Intro") + orphaned = find_orphaned_files({"intro.md"}, tmp_path) + assert orphaned == [] + + +def test_find_orphaned_files_detects_orphaned_markdown(tmp_path: Path) -> None: + (tmp_path / "orphan.md").write_text("# Orphan") + orphaned = find_orphaned_files(set(), tmp_path) + assert any("orphan.md" in o for o in orphaned) + + +def test_find_orphaned_files_skips_build_directory(tmp_path: Path) -> None: + build_dir = tmp_path / "_build" + build_dir.mkdir() + (build_dir / "generated.md").write_text("# Generated") + orphaned = find_orphaned_files(set(), tmp_path) + assert not any("_build" in o for o in orphaned) + + +def test_find_orphaned_files_skips_myst_yml(tmp_path: Path) -> None: + (tmp_path / "myst.yml").write_text("project:") + orphaned = find_orphaned_files(set(), tmp_path) + assert not any("myst.yml" in o for o in orphaned) + + +def test_find_orphaned_files_skips_py_companion_files(tmp_path: Path) -> None: + (tmp_path / "notebook.ipynb").write_text("{}") + (tmp_path / "notebook.py").write_text("# companion") + orphaned = find_orphaned_files(set(), tmp_path) + assert not any("notebook.py" in o for o in orphaned)