diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2986a0c..264e27a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.14.11 + rev: v0.15.1 hooks: # Run the linter - id: ruff-check diff --git a/README.md b/README.md index 28015fb..2fdcd94 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Rosettes is a syntax highlighter for Python 3.14t. Hand-written state machines, - **O(n) guaranteed** — Hand-written state machines, no regex backtracking - **Zero ReDoS** — No exploitable patterns, safe for untrusted input -- **Thread-safe** — Immutable state, optimized for Python 3.14t free-threading +- **Free-threading native** — All lexer state is local variables, keyword tables are `frozenset`, tokens are immutable. Highlight from any number of threads with zero contention. - **Pygments compatible** — Drop-in CSS class compatibility for existing themes - **55 languages** — Python, JavaScript, Rust, Go, and 51 more @@ -228,7 +228,15 @@ On a 10,000-line Python file: - **Tokenize** — ~12ms - **Highlight** — ~18ms -- **Parallel (8 blocks)** — ~22ms (1.5–2x speedup on Python 3.14t free-threading) +- **Parallel highlighting** — Run `python benchmarks/benchmark_parallel.py` to see scaling on your machine. Example with 100 code blocks on 8-core: + + ``` + Threads Time Speedup + 1 0.04s 1.00x + 2 0.02s 1.61x + 4 0.02s 2.53x + 8 0.02s 2.10x + ``` --- @@ -255,6 +263,12 @@ uv sync --group dev pytest ``` +**Run parallel benchmark** (free-threading scaling demo): + +```bash +python benchmarks/benchmark_parallel.py +``` + --- ## The Bengal Ecosystem diff --git a/benchmarks/benchmark_parallel.py b/benchmarks/benchmark_parallel.py new file mode 100644 index 0000000..58d7d18 --- /dev/null +++ b/benchmarks/benchmark_parallel.py @@ -0,0 +1,114 @@ +"""Parallel highlighting benchmark for free-threading visibility. + +Run with: + python benchmarks/benchmark_parallel.py + python -m benchmarks.benchmark_parallel + +Demonstrates thread scaling when highlighting many code blocks in parallel +under Python 3.14t free-threading. Uses highlight_many() for parallel path +and sequential highlight() loop for baseline. Stdlib only. +""" + +import sys +import time +from pathlib import Path + + +def _load_sample_code() -> None: + """Add repo root to path and load sample_code (for script execution).""" + _repo_root = Path(__file__).resolve().parent.parent + if str(_repo_root) not in sys.path: + sys.path.insert(0, str(_repo_root)) + + +def make_blocks(n: int) -> list[tuple[str, str]]: + """Create n code blocks with varied languages (Python, JS, Rust, Go).""" + _load_sample_code() + from benchmarks.sample_code import ( + GO_CODE, + JAVASCRIPT_CODE, + PYTHON_MEDIUM, + PYTHON_SIMPLE, + RUST_CODE, + ) + + templates = [ + (PYTHON_SIMPLE, "python"), + (PYTHON_MEDIUM, "python"), + (RUST_CODE, "rust"), + (JAVASCRIPT_CODE, "javascript"), + (GO_CODE, "go"), + ] + return [templates[i % len(templates)] for i in range(n)] + + +def run_sequential(blocks: list[tuple[str, str]], iterations: int = 5) -> float: + """Highlight blocks sequentially with highlight().""" + from rosettes import highlight + + times: list[float] = [] + for _ in range(iterations): + start = time.perf_counter() + for code, lang in blocks: + highlight(code, lang) + times.append(time.perf_counter() - start) + return sum(times) / len(times) + + +def run_parallel(blocks: list[tuple[str, str]], max_workers: int, iterations: int = 5) -> float: + """Highlight blocks in parallel with highlight_many().""" + from rosettes import highlight_many + + times: list[float] = [] + for _ in range(iterations): + start = time.perf_counter() + highlight_many(blocks, max_workers=max_workers) + times.append(time.perf_counter() - start) + return sum(times) / len(times) + + +def main() -> None: + """Run parallel highlighting benchmark and print results.""" + # GIL detection + gil_enabled = getattr(sys, "_is_gil_enabled", lambda: True)() + if gil_enabled: + print("Free-threaded build: No (GIL enabled)") + print("\nRun with Python 3.14t (free-threading) to see parallel scaling.") + print("Example: python3.14t benchmarks/benchmark_parallel.py") + else: + version = sys.version.split()[0] + print(f"Free-threaded build: Yes ({version})") + + n_blocks = 100 + blocks = make_blocks(n_blocks) + print(f"\nParallel highlighting benchmark: {n_blocks} code blocks\n") + + # Warmup + from rosettes import highlight + + for code, lang in blocks[:5]: + highlight(code, lang) + + # Benchmark: 1 thread = sequential, 2/4/8 = highlight_many with max_workers + results: list[tuple[int, float, float]] = [] + baseline_time: float | None = None + + for num_threads in [1, 2, 4, 8]: + if num_threads == 1: + elapsed = run_sequential(blocks) + else: + elapsed = run_parallel(blocks, max_workers=num_threads) + if baseline_time is None: + baseline_time = elapsed + speedup = baseline_time / elapsed + results.append((num_threads, elapsed, speedup)) + + # Print table + print(" Threads Time Speedup") + for num_threads, elapsed, speedup in results: + print(f" {num_threads:<10} {elapsed:.2f}s {speedup:.2f}x") + print() + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 8a92015..b2c2c12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,9 +79,11 @@ select = [ # Per-file ignore rules [tool.ruff.lint.per-file-ignores] -"__init__.py" = ["F401"] # Allow unused imports in __init__ files +"__init__.py" = ["F401"] # Allow unused imports in __init__ files "tests/**/*.py" = ["S101"] # Allow assert in tests -"tests/fixtures/**/*.py" = ["F821"] # Fixtures are intentionally invalid code for lexer tests +"tests/fixtures/**/*.py" = [ + "F821", +] # Fixtures are intentionally invalid code for lexer tests [dependency-groups] dev = [ @@ -89,7 +91,7 @@ dev = [ "pytest-cov>=6.0.0", "pytest-timeout>=2.3.0", # Prevent hung tests "ty>=0.0.11", # Astral type checker (Rust-based) - "ruff>=0.14.0", # 0.14+ required for Python 3.14 support + "ruff>=0.15.1", # 0.15.1+ fixes except-parenthesis bug (PEP 758 + as clause) "pre-commit>=4.0.0", # For git hooks "hypothesis>=6.100.0", # Property-based testing ]