Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 60 additions & 59 deletions build_scripts/generate_rss.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,62 +66,63 @@ def extract_date_from_filename(filename: str) -> str:
return f"{year}-{int(month):02d}-{int(day):02d}"


# Generate the RSS feed structure
print("Generating RSS feed structure...")
fg = FeedGenerator()
fg.link(href="https://microsoft.github.io/PyRIT/blog/rss.xml", rel="self")
fg.title("PyRIT Blog")
fg.description("PyRIT Blog")
fg.logo("https://microsoft.github.io/PyRIT/_static/roakey.png")
fg.language("en")

# Iterate over the blog source markdown files
print("Pulling blog files...")
if not BLOG_SOURCE_DIR.exists():
print(f"Error: Blog source directory {BLOG_SOURCE_DIR} not found. Exiting.")
sys.exit(1)

files = [f for f in BLOG_SOURCE_DIR.iterdir() if f.is_file() and f.name.startswith("20") and f.suffix == ".md"]
if len(files) == 0:
print("Error: No blog files found. Exiting.")
sys.exit(1)
files.sort(key=lambda x: x.name)

# Add a feed entry for each file
for file in files:
print(f"Parsing {file.name}...")
fe = fg.add_entry()
# Blog pages are served at blog/<filename_without_ext>
page_name = file.stem
fe.link(href=f"https://microsoft.github.io/PyRIT/blog/{page_name}")
fe.guid(f"https://microsoft.github.io/PyRIT/blog/{page_name}")

title, description = parse_blog_markdown(file)
fe.title(title)
fe.description(description)

pub_date = extract_date_from_filename(file.name)
if pub_date:
fe.pubDate(f"{pub_date}T10:00:00Z")

# Validating the RSS feed
print("Validating RSS feed...")
first_entry = fg.entry()[-1]
if first_entry.title() != "Multi-Turn orchestrators":
print(f"Error: Title parsing failed. Got: {first_entry.title()!r}. Exiting.")
sys.exit(1)
expected_desc_start = "In PyRIT, orchestrators are typically seen as the top-level component."
if not first_entry.description().startswith(expected_desc_start):
print(f"Error: Description parsing failed. Got: {first_entry.description()[:80]!r}. Exiting.")
sys.exit(1)

# Export the RSS feed
print("Exporting RSS feed...")
RSS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
rss_path = RSS_OUTPUT_DIR / "rss.xml"
fg.rss_file(str(rss_path), pretty=True)
if not rss_path.exists() or rss_path.stat().st_size == 0:
print("Error: RSS feed export failed. Exiting.")
sys.exit(1)

print("RSS feed generated and exported successfully.")
if __name__ == "__main__":
# Generate the RSS feed structure
print("Generating RSS feed structure...")
fg = FeedGenerator()
fg.link(href="https://microsoft.github.io/PyRIT/blog/rss.xml", rel="self")
fg.title("PyRIT Blog")
fg.description("PyRIT Blog")
fg.logo("https://microsoft.github.io/PyRIT/_static/roakey.png")
fg.language("en")

# Iterate over the blog source markdown files
print("Pulling blog files...")
if not BLOG_SOURCE_DIR.exists():
print(f"Error: Blog source directory {BLOG_SOURCE_DIR} not found. Exiting.")
sys.exit(1)

files = [f for f in BLOG_SOURCE_DIR.iterdir() if f.is_file() and f.name.startswith("20") and f.suffix == ".md"]
if len(files) == 0:
print("Error: No blog files found. Exiting.")
sys.exit(1)
files.sort(key=lambda x: x.name)

# Add a feed entry for each file
for file in files:
print(f"Parsing {file.name}...")
fe = fg.add_entry()
# Blog pages are served at blog/<filename_without_ext>
page_name = file.stem
fe.link(href=f"https://microsoft.github.io/PyRIT/blog/{page_name}")
fe.guid(f"https://microsoft.github.io/PyRIT/blog/{page_name}")

title, description = parse_blog_markdown(file)
fe.title(title)
fe.description(description)

pub_date = extract_date_from_filename(file.name)
if pub_date:
fe.pubDate(f"{pub_date}T10:00:00Z")

# Validating the RSS feed
print("Validating RSS feed...")
first_entry = fg.entry()[-1]
if first_entry.title() != "Multi-Turn orchestrators":
print(f"Error: Title parsing failed. Got: {first_entry.title()!r}. Exiting.")
sys.exit(1)
expected_desc_start = "In PyRIT, orchestrators are typically seen as the top-level component."
if not first_entry.description().startswith(expected_desc_start):
print(f"Error: Description parsing failed. Got: {first_entry.description()[:80]!r}. Exiting.")
sys.exit(1)

# Export the RSS feed
print("Exporting RSS feed...")
RSS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
rss_path = RSS_OUTPUT_DIR / "rss.xml"
fg.rss_file(str(rss_path), pretty=True)
if not rss_path.exists() or rss_path.stat().st_size == 0:
print("Error: RSS feed export failed. Exiting.")
sys.exit(1)

print("RSS feed generated and exported successfully.")
88 changes: 88 additions & 0 deletions tests/unit/build_scripts/test_check_links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from pathlib import Path

from build_scripts.check_links import extract_urls, resolve_relative_url, strip_fragment


def test_strip_fragment_removes_fragment() -> None:
assert strip_fragment("https://example.com/page#section") == "https://example.com/page"


def test_strip_fragment_no_fragment_unchanged() -> None:
assert strip_fragment("https://example.com/page") == "https://example.com/page"


def test_strip_fragment_empty_fragment() -> None:
assert strip_fragment("https://example.com/page#") == "https://example.com/page"


def test_strip_fragment_preserves_query_string() -> None:
result = strip_fragment("https://example.com/page?q=1#section")
assert "q=1" in result
assert "section" not in result


def test_resolve_relative_url_http_url_unchanged() -> None:
url = "https://example.com"
assert resolve_relative_url("/some/file.md", url) == url


def test_resolve_relative_url_mailto_unchanged() -> None:
url = "mailto:test@example.com"
assert resolve_relative_url("/some/file.md", url) == url


def test_resolve_relative_url_resolved(tmp_path: Path) -> None:
base = str(tmp_path / "docs" / "file.md")
target = tmp_path / "docs" / "other.md"
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text("# Other")
result = resolve_relative_url(base, "other.md")
assert result == str(target)


def test_resolve_relative_url_with_md_extension(tmp_path: Path) -> None:
base = str(tmp_path / "docs" / "file.md")
target = tmp_path / "docs" / "other.md"
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text("# Other")
result = resolve_relative_url(base, "other")
assert result.endswith(".md")


def test_extract_urls_extracts_markdown_links(tmp_path: Path) -> None:
f = tmp_path / "test.md"
f.write_text("[Click here](https://example.com)")
urls = extract_urls(str(f))
assert "https://example.com" in urls


def test_extract_urls_extracts_href_links(tmp_path: Path) -> None:
f = tmp_path / "test.html"
f.write_text('<a href="https://example.com">link</a>')
urls = extract_urls(str(f))
assert "https://example.com" in urls


def test_extract_urls_extracts_src_links(tmp_path: Path) -> None:
f = tmp_path / "test.html"
f.write_text('<img src="https://example.com/image.png">')
urls = extract_urls(str(f))
assert "https://example.com/image.png" in urls


def test_extract_urls_empty_file_returns_no_urls(tmp_path: Path) -> None:
f = tmp_path / "empty.md"
f.write_text("")
urls = extract_urls(str(f))
assert urls == []


def test_extract_urls_strips_fragments(tmp_path: Path) -> None:
f = tmp_path / "test.md"
f.write_text("[link](https://example.com/page#section)")
urls = extract_urls(str(f))
assert "https://example.com/page" in urls
assert not any("#section" in u for u in urls)
63 changes: 63 additions & 0 deletions tests/unit/build_scripts/test_generate_rss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from pathlib import Path

from build_scripts.generate_rss import extract_date_from_filename, parse_blog_markdown


def test_extract_date_from_filename_standard_date() -> None:
assert extract_date_from_filename("2024_12_3.md") == "2024-12-03"


def test_extract_date_from_filename_double_digit_day_and_month() -> None:
assert extract_date_from_filename("2023_11_25.md") == "2023-11-25"


def test_extract_date_from_filename_single_digit_month() -> None:
assert extract_date_from_filename("2024_1_15.md") == "2024-01-15"


def test_extract_date_from_filename_returns_empty_for_invalid() -> None:
assert extract_date_from_filename("no_date_here.md") == ""


def test_extract_date_from_filename_returns_empty_for_non_numeric() -> None:
assert extract_date_from_filename("intro.md") == ""


def test_parse_blog_markdown_extracts_title(tmp_path: Path) -> None:
f = tmp_path / "2024_01_01.md"
f.write_text("# My Blog Title\n\nSome description here.")
title, _ = parse_blog_markdown(f)
assert title == "My Blog Title"


def test_parse_blog_markdown_extracts_description(tmp_path: Path) -> None:
f = tmp_path / "2024_01_01.md"
f.write_text("# Title\n\nThis is the description paragraph.")
_, desc = parse_blog_markdown(f)
assert "This is the description paragraph." in desc


def test_parse_blog_markdown_skips_small_tag(tmp_path: Path) -> None:
f = tmp_path / "2024_01_01.md"
f.write_text("# Title\n\n<small>date info</small>\n\nReal description here.")
_, desc = parse_blog_markdown(f)
assert "small" not in desc
assert "Real description here." in desc


def test_parse_blog_markdown_empty_title_when_no_heading(tmp_path: Path) -> None:
f = tmp_path / "2024_01_01.md"
f.write_text("No heading here.\n\nJust paragraphs.")
title, _ = parse_blog_markdown(f)
assert title == ""


def test_parse_blog_markdown_multiline_description_joined(tmp_path: Path) -> None:
f = tmp_path / "2024_01_01.md"
f.write_text("# Title\n\nLine one.\nLine two.")
_, desc = parse_blog_markdown(f)
assert "Line one." in desc
assert "Line two." in desc
95 changes: 95 additions & 0 deletions tests/unit/build_scripts/test_prepare_package.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import subprocess
from pathlib import Path
from unittest.mock import MagicMock, patch

from build_scripts.prepare_package import build_frontend, copy_frontend_to_package


def test_build_frontend_returns_false_when_npm_not_found(tmp_path: Path) -> None:
with patch("build_scripts.prepare_package.shutil.which", return_value=None):
result = build_frontend(tmp_path)
assert result is False


def test_build_frontend_returns_false_when_package_json_missing(tmp_path: Path) -> None:
with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"):
with patch("build_scripts.prepare_package.subprocess.run", return_value=MagicMock(stdout="10.0.0\n")):
result = build_frontend(tmp_path)
assert result is False


def test_build_frontend_returns_false_when_npm_install_fails(tmp_path: Path) -> None:
(tmp_path / "package.json").write_text("{}")
responses = [
MagicMock(stdout="10.0.0\n"),
subprocess.CalledProcessError(1, "npm install", output="error"),
]
with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"):
with patch("build_scripts.prepare_package.subprocess.run", side_effect=responses):
result = build_frontend(tmp_path)
assert result is False


def test_build_frontend_returns_false_when_npm_build_fails(tmp_path: Path) -> None:
(tmp_path / "package.json").write_text("{}")
responses = [
MagicMock(stdout="10.0.0\n"),
MagicMock(),
subprocess.CalledProcessError(1, "npm run build", output="error"),
]
with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"):
with patch("build_scripts.prepare_package.subprocess.run", side_effect=responses):
result = build_frontend(tmp_path)
assert result is False


def test_build_frontend_returns_true_when_build_succeeds(tmp_path: Path) -> None:
(tmp_path / "package.json").write_text("{}")
responses = [
MagicMock(stdout="10.0.0\n"), # npm --version
MagicMock(), # npm install
MagicMock(), # npm run build
]
with patch("build_scripts.prepare_package.shutil.which", return_value="/usr/bin/npm"):
with patch("build_scripts.prepare_package.subprocess.run", side_effect=responses) as mock_run:
result = build_frontend(tmp_path)
assert result is True
assert mock_run.call_count == 3


def test_copy_frontend_returns_false_when_dist_missing(tmp_path: Path) -> None:
result = copy_frontend_to_package(tmp_path / "dist", tmp_path / "out")
assert result is False


def test_copy_frontend_returns_false_when_index_html_missing(tmp_path: Path) -> None:
dist = tmp_path / "dist"
dist.mkdir()
(dist / "main.js").write_text("console.log('hi')")
result = copy_frontend_to_package(dist, tmp_path / "out")
assert result is False


def test_copy_frontend_returns_true_when_copy_succeeds(tmp_path: Path) -> None:
dist = tmp_path / "dist"
dist.mkdir()
(dist / "index.html").write_text("<html></html>")
out = tmp_path / "out"
result = copy_frontend_to_package(dist, out)
assert result is True
assert (out / "index.html").exists()


def test_copy_frontend_removes_existing_output_dir(tmp_path: Path) -> None:
dist = tmp_path / "dist"
dist.mkdir()
(dist / "index.html").write_text("<html></html>")
out = tmp_path / "out"
out.mkdir()
(out / "old_file.txt").write_text("old")
copy_frontend_to_package(dist, out)
assert not (out / "old_file.txt").exists()
assert (out / "index.html").exists()
Loading
Loading