diff --git a/build_scripts/generate_rss.py b/build_scripts/generate_rss.py index 4a11fda4b..cb8629ddf 100644 --- a/build_scripts/generate_rss.py +++ b/build_scripts/generate_rss.py @@ -66,62 +66,63 @@ def extract_date_from_filename(filename: str) -> str: return f"{year}-{int(month):02d}-{int(day):02d}" -# Generate the RSS feed structure -print("Generating RSS feed structure...") -fg = FeedGenerator() -fg.link(href="https://microsoft.github.io/PyRIT/blog/rss.xml", rel="self") -fg.title("PyRIT Blog") -fg.description("PyRIT Blog") -fg.logo("https://microsoft.github.io/PyRIT/_static/roakey.png") -fg.language("en") - -# Iterate over the blog source markdown files -print("Pulling blog files...") -if not BLOG_SOURCE_DIR.exists(): - print(f"Error: Blog source directory {BLOG_SOURCE_DIR} not found. Exiting.") - sys.exit(1) - -files = [f for f in BLOG_SOURCE_DIR.iterdir() if f.is_file() and f.name.startswith("20") and f.suffix == ".md"] -if len(files) == 0: - print("Error: No blog files found. Exiting.") - sys.exit(1) -files.sort(key=lambda x: x.name) - -# Add a feed entry for each file -for file in files: - print(f"Parsing {file.name}...") - fe = fg.add_entry() - # Blog pages are served at blog/ - page_name = file.stem - fe.link(href=f"https://microsoft.github.io/PyRIT/blog/{page_name}") - fe.guid(f"https://microsoft.github.io/PyRIT/blog/{page_name}") - - title, description = parse_blog_markdown(file) - fe.title(title) - fe.description(description) - - pub_date = extract_date_from_filename(file.name) - if pub_date: - fe.pubDate(f"{pub_date}T10:00:00Z") - -# Validating the RSS feed -print("Validating RSS feed...") -first_entry = fg.entry()[-1] -if first_entry.title() != "Multi-Turn orchestrators": - print(f"Error: Title parsing failed. Got: {first_entry.title()!r}. Exiting.") - sys.exit(1) -expected_desc_start = "In PyRIT, orchestrators are typically seen as the top-level component." -if not first_entry.description().startswith(expected_desc_start): - print(f"Error: Description parsing failed. Got: {first_entry.description()[:80]!r}. Exiting.") - sys.exit(1) - -# Export the RSS feed -print("Exporting RSS feed...") -RSS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) -rss_path = RSS_OUTPUT_DIR / "rss.xml" -fg.rss_file(str(rss_path), pretty=True) -if not rss_path.exists() or rss_path.stat().st_size == 0: - print("Error: RSS feed export failed. Exiting.") - sys.exit(1) - -print("RSS feed generated and exported successfully.") +if __name__ == "__main__": + # Generate the RSS feed structure + print("Generating RSS feed structure...") + fg = FeedGenerator() + fg.link(href="https://microsoft.github.io/PyRIT/blog/rss.xml", rel="self") + fg.title("PyRIT Blog") + fg.description("PyRIT Blog") + fg.logo("https://microsoft.github.io/PyRIT/_static/roakey.png") + fg.language("en") + + # Iterate over the blog source markdown files + print("Pulling blog files...") + if not BLOG_SOURCE_DIR.exists(): + print(f"Error: Blog source directory {BLOG_SOURCE_DIR} not found. Exiting.") + sys.exit(1) + + files = [f for f in BLOG_SOURCE_DIR.iterdir() if f.is_file() and f.name.startswith("20") and f.suffix == ".md"] + if len(files) == 0: + print("Error: No blog files found. Exiting.") + sys.exit(1) + files.sort(key=lambda x: x.name) + + # Add a feed entry for each file + for file in files: + print(f"Parsing {file.name}...") + fe = fg.add_entry() + # Blog pages are served at blog/ + page_name = file.stem + fe.link(href=f"https://microsoft.github.io/PyRIT/blog/{page_name}") + fe.guid(f"https://microsoft.github.io/PyRIT/blog/{page_name}") + + title, description = parse_blog_markdown(file) + fe.title(title) + fe.description(description) + + pub_date = extract_date_from_filename(file.name) + if pub_date: + fe.pubDate(f"{pub_date}T10:00:00Z") + + # Validating the RSS feed + print("Validating RSS feed...") + first_entry = fg.entry()[-1] + if first_entry.title() != "Multi-Turn orchestrators": + print(f"Error: Title parsing failed. Got: {first_entry.title()!r}. Exiting.") + sys.exit(1) + expected_desc_start = "In PyRIT, orchestrators are typically seen as the top-level component." + if not first_entry.description().startswith(expected_desc_start): + print(f"Error: Description parsing failed. Got: {first_entry.description()[:80]!r}. Exiting.") + sys.exit(1) + + # Export the RSS feed + print("Exporting RSS feed...") + RSS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + rss_path = RSS_OUTPUT_DIR / "rss.xml" + fg.rss_file(str(rss_path), pretty=True) + if not rss_path.exists() or rss_path.stat().st_size == 0: + print("Error: RSS feed export failed. Exiting.") + sys.exit(1) + + print("RSS feed generated and exported successfully.") diff --git a/tests/unit/build_scripts/test_check_links.py b/tests/unit/build_scripts/test_check_links.py new file mode 100644 index 000000000..75bc4aec4 --- /dev/null +++ b/tests/unit/build_scripts/test_check_links.py @@ -0,0 +1,88 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +from pathlib import Path + +from build_scripts.check_links import extract_urls, resolve_relative_url, strip_fragment + + +def test_strip_fragment_removes_fragment() -> None: + assert strip_fragment("https://example.com/page#section") == "https://example.com/page" + + +def test_strip_fragment_no_fragment_unchanged() -> None: + assert strip_fragment("https://example.com/page") == "https://example.com/page" + + +def test_strip_fragment_empty_fragment() -> None: + assert strip_fragment("https://example.com/page#") == "https://example.com/page" + + +def test_strip_fragment_preserves_query_string() -> None: + result = strip_fragment("https://example.com/page?q=1#section") + assert "q=1" in result + assert "section" not in result + + +def test_resolve_relative_url_http_url_unchanged() -> None: + url = "https://example.com" + assert resolve_relative_url("/some/file.md", url) == url + + +def test_resolve_relative_url_mailto_unchanged() -> None: + url = "mailto:test@example.com" + assert resolve_relative_url("/some/file.md", url) == url + + +def test_resolve_relative_url_resolved(tmp_path: Path) -> None: + base = str(tmp_path / "docs" / "file.md") + target = tmp_path / "docs" / "other.md" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text("# Other") + result = resolve_relative_url(base, "other.md") + assert result == str(target) + + +def test_resolve_relative_url_with_md_extension(tmp_path: Path) -> None: + base = str(tmp_path / "docs" / "file.md") + target = tmp_path / "docs" / "other.md" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text("# Other") + result = resolve_relative_url(base, "other") + assert result.endswith(".md") + + +def test_extract_urls_extracts_markdown_links(tmp_path: Path) -> None: + f = tmp_path / "test.md" + f.write_text("[Click here](https://example.com)") + urls = extract_urls(str(f)) + assert "https://example.com" in urls + + +def test_extract_urls_extracts_href_links(tmp_path: Path) -> None: + f = tmp_path / "test.html" + f.write_text('link') + urls = extract_urls(str(f)) + assert "https://example.com" in urls + + +def test_extract_urls_extracts_src_links(tmp_path: Path) -> None: + f = tmp_path / "test.html" + f.write_text('') + urls = extract_urls(str(f)) + assert "https://example.com/image.png" in urls + + +def test_extract_urls_empty_file_returns_no_urls(tmp_path: Path) -> None: + f = tmp_path / "empty.md" + f.write_text("") + urls = extract_urls(str(f)) + assert urls == [] + + +def test_extract_urls_strips_fragments(tmp_path: Path) -> None: + f = tmp_path / "test.md" + f.write_text("[link](https://example.com/page#section)") + urls = extract_urls(str(f)) + assert "https://example.com/page" in urls + assert not any("#section" in u for u in urls) diff --git a/tests/unit/build_scripts/test_generate_rss.py b/tests/unit/build_scripts/test_generate_rss.py new file mode 100644 index 000000000..b543f82c3 --- /dev/null +++ b/tests/unit/build_scripts/test_generate_rss.py @@ -0,0 +1,63 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +from pathlib import Path + +from build_scripts.generate_rss import extract_date_from_filename, parse_blog_markdown + + +def test_extract_date_from_filename_standard_date() -> None: + assert extract_date_from_filename("2024_12_3.md") == "2024-12-03" + + +def test_extract_date_from_filename_double_digit_day_and_month() -> None: + assert extract_date_from_filename("2023_11_25.md") == "2023-11-25" + + +def test_extract_date_from_filename_single_digit_month() -> None: + assert extract_date_from_filename("2024_1_15.md") == "2024-01-15" + + +def test_extract_date_from_filename_returns_empty_for_invalid() -> None: + assert extract_date_from_filename("no_date_here.md") == "" + + +def test_extract_date_from_filename_returns_empty_for_non_numeric() -> None: + assert extract_date_from_filename("intro.md") == "" + + +def test_parse_blog_markdown_extracts_title(tmp_path: Path) -> None: + f = tmp_path / "2024_01_01.md" + f.write_text("# My Blog Title\n\nSome description here.") + title, _ = parse_blog_markdown(f) + assert title == "My Blog Title" + + +def test_parse_blog_markdown_extracts_description(tmp_path: Path) -> None: + f = tmp_path / "2024_01_01.md" + f.write_text("# Title\n\nThis is the description paragraph.") + _, desc = parse_blog_markdown(f) + assert "This is the description paragraph." in desc + + +def test_parse_blog_markdown_skips_small_tag(tmp_path: Path) -> None: + f = tmp_path / "2024_01_01.md" + f.write_text("# Title\n\ndate info\n\nReal description here.") + _, desc = parse_blog_markdown(f) + assert "small" not in desc + assert "Real description here." in desc + + +def test_parse_blog_markdown_empty_title_when_no_heading(tmp_path: Path) -> None: + f = tmp_path / "2024_01_01.md" + f.write_text("No heading here.\n\nJust paragraphs.") + title, _ = parse_blog_markdown(f) + assert title == "" + + +def test_parse_blog_markdown_multiline_description_joined(tmp_path: Path) -> None: + f = tmp_path / "2024_01_01.md" + f.write_text("# Title\n\nLine one.\nLine two.") + _, desc = parse_blog_markdown(f) + assert "Line one." in desc + assert "Line two." in desc diff --git a/tests/unit/build_scripts/test_prepare_package.py b/tests/unit/build_scripts/test_prepare_package.py new file mode 100644 index 000000000..8dde85147 --- /dev/null +++ b/tests/unit/build_scripts/test_prepare_package.py @@ -0,0 +1,95 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import subprocess +from pathlib import Path +from unittest.mock import MagicMock, patch + +from build_scripts.prepare_package import build_frontend, copy_frontend_to_package + + +def test_build_frontend_returns_false_when_npm_not_found(tmp_path: Path) -> None: + with patch("build_scripts.prepare_package.shutil.which", return_value=None): + result = build_frontend(tmp_path) + assert result is False + + +def test_build_frontend_returns_false_when_package_json_missing(tmp_path: Path) -> None: + with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"): + with patch("build_scripts.prepare_package.subprocess.run", return_value=MagicMock(stdout="10.0.0\n")): + result = build_frontend(tmp_path) + assert result is False + + +def test_build_frontend_returns_false_when_npm_install_fails(tmp_path: Path) -> None: + (tmp_path / "package.json").write_text("{}") + responses = [ + MagicMock(stdout="10.0.0\n"), + subprocess.CalledProcessError(1, "npm install", output="error"), + ] + with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"): + with patch("build_scripts.prepare_package.subprocess.run", side_effect=responses): + result = build_frontend(tmp_path) + assert result is False + + +def test_build_frontend_returns_false_when_npm_build_fails(tmp_path: Path) -> None: + (tmp_path / "package.json").write_text("{}") + responses = [ + MagicMock(stdout="10.0.0\n"), + MagicMock(), + subprocess.CalledProcessError(1, "npm run build", output="error"), + ] + with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"): + with patch("build_scripts.prepare_package.subprocess.run", side_effect=responses): + result = build_frontend(tmp_path) + assert result is False + + +def test_build_frontend_returns_true_when_build_succeeds(tmp_path: Path) -> None: + (tmp_path / "package.json").write_text("{}") + responses = [ + MagicMock(stdout="10.0.0\n"), # npm --version + MagicMock(), # npm install + MagicMock(), # npm run build + ] + with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"): + with patch("build_scripts.prepare_package.subprocess.run", side_effect=responses) as mock_run: + result = build_frontend(tmp_path) + assert result is True + assert mock_run.call_count == 3 + + +def test_copy_frontend_returns_false_when_dist_missing(tmp_path: Path) -> None: + result = copy_frontend_to_package(tmp_path / "dist", tmp_path / "out") + assert result is False + + +def test_copy_frontend_returns_false_when_index_html_missing(tmp_path: Path) -> None: + dist = tmp_path / "dist" + dist.mkdir() + (dist / "main.js").write_text("console.log('hi')") + result = copy_frontend_to_package(dist, tmp_path / "out") + assert result is False + + +def test_copy_frontend_returns_true_when_copy_succeeds(tmp_path: Path) -> None: + dist = tmp_path / "dist" + dist.mkdir() + (dist / "index.html").write_text("") + out = tmp_path / "out" + result = copy_frontend_to_package(dist, out) + assert result is True + assert (out / "index.html").exists() + + +def test_copy_frontend_removes_existing_output_dir(tmp_path: Path) -> None: + dist = tmp_path / "dist" + dist.mkdir() + (dist / "index.html").write_text("") + out = tmp_path / "out" + out.mkdir() + (out / "old_file.txt").write_text("old") + copy_frontend_to_package(dist, out) + assert not (out / "old_file.txt").exists() + assert (out / "index.html").exists() diff --git a/tests/unit/build_scripts/test_validate_docs.py b/tests/unit/build_scripts/test_validate_docs.py new file mode 100644 index 000000000..7a048a8cc --- /dev/null +++ b/tests/unit/build_scripts/test_validate_docs.py @@ -0,0 +1,91 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +from pathlib import Path + +from build_scripts.validate_docs import find_orphaned_files, parse_toc_files, validate_toc_files + + +def test_parse_toc_files_extracts_single_file() -> None: + toc = [{"file": "intro"}] + result = parse_toc_files(toc) + assert "intro" in result + + +def test_parse_toc_files_extracts_nested_children() -> None: + toc = [{"file": "parent", "children": [{"file": "child"}]}] + result = parse_toc_files(toc) + assert "parent" in result + assert "child" in result + + +def test_parse_toc_files_ignores_entries_without_file() -> None: + toc = [{"title": "No file here"}] + result = parse_toc_files(toc) + assert len(result) == 0 + + +def test_parse_toc_files_empty_toc() -> None: + result = parse_toc_files([]) + assert result == set() + + +def test_parse_toc_files_normalizes_backslashes() -> None: + toc = [{"file": "setup\\install"}] + result = parse_toc_files(toc) + assert "setup/install" in result + + +def test_validate_toc_files_no_errors_when_files_exist(tmp_path: Path) -> None: + (tmp_path / "intro.md").write_text("# Intro") + errors = validate_toc_files({"intro.md"}, tmp_path) + assert errors == [] + + +def test_validate_toc_files_error_when_file_missing(tmp_path: Path) -> None: + errors = validate_toc_files({"missing.md"}, tmp_path) + assert len(errors) == 1 + assert "missing.md" in errors[0] + + +def test_validate_toc_files_skips_api_generated_files(tmp_path: Path) -> None: + errors = validate_toc_files({"api/some_module"}, tmp_path) + assert errors == [] + + +def test_validate_toc_files_multiple_missing_files(tmp_path: Path) -> None: + errors = validate_toc_files({"a.md", "b.md"}, tmp_path) + assert len(errors) == 2 + + +def test_find_orphaned_files_no_orphans_when_all_referenced(tmp_path: Path) -> None: + (tmp_path / "intro.md").write_text("# Intro") + orphaned = find_orphaned_files({"intro.md"}, tmp_path) + assert orphaned == [] + + +def test_find_orphaned_files_detects_orphaned_markdown(tmp_path: Path) -> None: + (tmp_path / "orphan.md").write_text("# Orphan") + orphaned = find_orphaned_files(set(), tmp_path) + assert any("orphan.md" in o for o in orphaned) + + +def test_find_orphaned_files_skips_build_directory(tmp_path: Path) -> None: + build_dir = tmp_path / "_build" + build_dir.mkdir() + (build_dir / "generated.md").write_text("# Generated") + orphaned = find_orphaned_files(set(), tmp_path) + assert not any("_build" in o for o in orphaned) + + +def test_find_orphaned_files_skips_myst_yml(tmp_path: Path) -> None: + (tmp_path / "myst.yml").write_text("project:") + orphaned = find_orphaned_files(set(), tmp_path) + assert not any("myst.yml" in o for o in orphaned) + + +def test_find_orphaned_files_skips_py_companion_files(tmp_path: Path) -> None: + (tmp_path / "notebook.ipynb").write_text("{}") + (tmp_path / "notebook.py").write_text("# companion") + orphaned = find_orphaned_files(set(), tmp_path) + assert not any("notebook.py" in o for o in orphaned)