diff --git a/Makefile b/Makefile index fbfe3092..74d13956 100644 --- a/Makefile +++ b/Makefile @@ -46,6 +46,10 @@ build: setup ## Build the specified module build-dev: setup ## Build module with development tools $(WRAPPER) mkosi --force --image-id $(IMAGE)-dev --profile=devtools --include=images/$(IMAGE).conf +# Build module with devtools and benchmark profiles +build-bench: setup ## Build module with development and benchmark tools + $(WRAPPER) mkosi --force --image-id $(IMAGE)-bench --profile=devtools,benchmark --include=images/$(IMAGE).conf + ##@ Utilities measure: ## Export TDX measurements for the built EFI file diff --git a/README.md b/README.md index 398917ee..f4dae735 100644 --- a/README.md +++ b/README.md @@ -161,6 +161,19 @@ try to disable apparmor's restriction: - If you encounter `bootctl: unrecognized option '--root=/buildroot'`, you'll need to upgrade to a newer version of systemd (at least v250), which is only supported by recent versions of Ubuntu/Debian. +## Benchmarking + +The `benchmark` mkosi profile adds benchmarking tools to any image. See [mkosi.profiles/benchmark/mkosi.conf](mkosi.profiles/benchmark/mkosi.conf) for installed packages. + +```bash +# Build with benchmark profile +make build-bench IMAGE=flashbox-l1 +``` + +There are two benchmark suites that can be run separately or together: one for kernel-level overhead (useful for measuring impact of kernel config changes) and one for application-level performance (CPU, disk I/O, network, entropy). Run `bench-all.sh` inside the VM to execute both. + +Before benchmarking, a preflight check detects resource-heavy services (e.g. lighthouse, searcher) and offers to stop them so they don't skew results. A short warmup pass runs first to burn off cold-start spikes. + ## 📖 Documentation - [Development Guide](DEVELOPMENT.md) - Comprehensive guide for creating new modules and extending existing ones diff --git a/mkosi.profiles/benchmark/mkosi.conf b/mkosi.profiles/benchmark/mkosi.conf new file mode 100644 index 00000000..ecd7dd47 --- /dev/null +++ b/mkosi.profiles/benchmark/mkosi.conf @@ -0,0 +1,11 @@ +[Content] +ExtraTrees=mkosi.extra + +Packages=sysbench + fio + libaio-dev + stress-ng + rt-tests + linux-perf + openssl + iperf3 \ No newline at end of file diff --git a/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-all.sh b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-all.sh new file mode 100755 index 00000000..e70ffe1c --- /dev/null +++ b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-all.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# Wrapper: runs both kernel-level and application-level benchmarks. +# Usage: bench-all.sh [ITERATIONS] +set -euo pipefail + +export ITERATIONS="${1:-3}" + +bench-preflight.sh +bench-warmup.sh + +echo "=== Running kernel-level benchmarks ===" +bench-kernel.sh + +echo "" +echo "=== Running application-level benchmarks ===" +bench-app.sh + +echo "" +echo "=== Done ===" +echo "Results in: kernel_benchmark_report.txt, benchmark_report.txt" diff --git a/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-app.sh b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-app.sh new file mode 100755 index 00000000..68103dc0 --- /dev/null +++ b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-app.sh @@ -0,0 +1,164 @@ +#!/usr/bin/env bash +# bench-app.sh — Application-level benchmarks for kernel performance regression testing. +# +# Disk I/O tests use psync/mmap engines (CONFIG_AIO and CONFIG_IO_URING are +# disabled per KSPP hardening). Test profiles reflect Ethereum node workloads: +# - 4K random mixed 75R/25W (chain-following steady state) +# - 4K random write + fsync (MDBX commit path) +# - Buffered/mmap 4K reads (memory-mapped database access) +# - Sequential 1M r/w (bulk data, compaction, snapshots) +# +# References: +# fio I/O engines (psync, libaio, io_uring): +# https://fio.readthedocs.io/en/latest/fio_doc.html +# MDBX I/O model (mmap reads, fdatasync commits): +# https://libmdbx.dqdkfa.ru/intro.html +# https://libmdbx.dqdkfa.ru/group__sync__modes.html +# Intel TDX performance benchmarking (MLC, fio, iperf3): +# https://www.intel.com/content/www/us/en/developer/articles/technical/tdx-performance-analysis-reference-documentation.html +# Ethereum node disk requirements: +# https://docs.nethermind.io/get-started/system-requirements/ +# https://geth.ethereum.org/docs/getting-started/hardware-requirements +# https://reth.rs/run/system-requirements/ +# +# Environment variables: +# ITERATIONS — benchmark iterations (default: 1) +# TESTDIR — fio working directory (default: /persistent/fio-tmp) +# SIZE — fio test file size (default: 2G) +# FIO_RUNTIME — seconds per fio test (default: 30) +# IPERF_SERVER — iperf3 server; skipped if unset +# IPERF_PORT — iperf3 port (default: 5201) +set -uo pipefail + +ITERATIONS="${ITERATIONS:-1}" +REPORT="benchmark_report.txt" + +TESTDIR="${TESTDIR:-/persistent/fio-tmp}" +TESTFILE="" # set after mkdir +SIZE="${SIZE:-2G}" +FIO_RUNTIME="${FIO_RUNTIME:-30}" + +IPERF_PORT="${IPERF_PORT:-5201}" + +mkdir -p "$TESTDIR" +chmod 700 "$TESTDIR" +TESTFILE="$TESTDIR/fio.test" + +: > "$REPORT" + +# Run a benchmark command, log output, continue on failure. +run_bench() { + local label="$1"; shift + echo "=== $label ===" | tee -a "$REPORT" + if "$@" 2>&1 | tee -a "$REPORT"; then + : + else + echo "*** FAILED (exit $?): $label ***" | tee -a "$REPORT" + fi + echo "" | tee -a "$REPORT" +} + +# Common fio arguments +fio_common="--time_based --runtime=${FIO_RUNTIME} --group_reporting --thread" + +for i in $(seq 1 "$ITERATIONS"); do + echo "========================================" | tee -a "$REPORT" + echo "=== ITERATION $i/$ITERATIONS ===" | tee -a "$REPORT" + echo "" | tee -a "$REPORT" + + # ── CPU ────────────────────────────────────────────────────────────── + run_bench "CPU: sysbench prime" \ + sysbench cpu --cpu-max-prime=50000 --time=30 --threads=1 run + + run_bench "CPU: openssl speed" \ + openssl speed --seconds 10 aes-256-cbc rsa2048 sha256 + + # ── Memory ─────────────────────────────────────────────────────────── + run_bench "MEMORY: sysbench random write" \ + sysbench memory --memory-total-size=256G --memory-block-size=1Kb \ + --memory-oper=write --memory-access-mode=rnd --threads=1 run + + # ── Disk I/O ───────────────────────────────────────────────────────── + echo "=== DISK I/O (ioengine=psync, direct=1 unless noted) ===" | tee -a "$REPORT" + echo "" | tee -a "$REPORT" + + # Sequential throughput — bulk data: WAL, compaction, snapshots + run_bench "DISK: sequential write throughput (1M, 4 jobs)" \ + fio --name=seq-write $fio_common \ + --ioengine=psync --direct=1 --rw=write --bs=1M \ + --numjobs=4 --size="$SIZE" \ + --filename="$TESTFILE" + + run_bench "DISK: sequential read throughput (1M, 4 jobs)" \ + fio --name=seq-read $fio_common \ + --ioengine=psync --direct=1 --rw=read --bs=1M \ + --numjobs=4 --size="$SIZE" \ + --filename="$TESTFILE" + + # Random 4K IOPS — database state access (dominant I/O pattern) + run_bench "DISK: random 4K read IOPS (16 jobs)" \ + fio --name=rand-read $fio_common \ + --ioengine=psync --direct=1 --rw=randread --bs=4k \ + --numjobs=16 --size="$SIZE" \ + --filename="$TESTFILE" + + run_bench "DISK: random 4K write IOPS (16 jobs)" \ + fio --name=rand-write $fio_common \ + --ioengine=psync --direct=1 --rw=randwrite --bs=4k \ + --numjobs=16 --size="$SIZE" \ + --filename="$TESTFILE" + + # Mixed 4K 75R/25W — Ethereum node steady-state profile + # Reference: Nethermind requires â‰Ĩ10K IOPS (r/w); all clients require NVMe SSD + run_bench "DISK: random 4K mixed 75R/25W (16 jobs) — steady state" \ + fio --name=rand-mixed $fio_common \ + --ioengine=psync --direct=1 --rw=randrw --rwmixread=75 --bs=4k \ + --numjobs=16 --size="$SIZE" \ + --filename="$TESTFILE" + + # fsync-per-write — measures full commit cycle (write + fdatasync) + # Key metric: "sync" percentiles in output, not "clat" + run_bench "DISK: random 4K write + fsync (1 job) — commit latency" \ + fio --name=rand-fsync $fio_common \ + --ioengine=psync --direct=1 --rw=randwrite --bs=4k \ + --numjobs=1 --size="$SIZE" --fsync=1 \ + --filename="$TESTFILE" + + # Buffered mmap reads — page-cache path used by memory-mapped databases + # Validate: output should show major page faults (majf); if zero, data was cached + run_bench "DISK: buffered random 4K read (mmap, 4 jobs) — page-cache path" \ + fio --name=mmap-read $fio_common \ + --ioengine=mmap --direct=0 --rw=randread --bs=4k \ + --numjobs=4 --size="$SIZE" \ + --filename="$TESTFILE" + + # ── Network ────────────────────────────────────────────────────────── + if [[ -n "${IPERF_SERVER:-}" ]]; then + run_bench "NETWORK: iperf3 upload (VM → host)" \ + iperf3 -c "$IPERF_SERVER" -p "$IPERF_PORT" -t 30 + run_bench "NETWORK: iperf3 download (host → VM)" \ + iperf3 -c "$IPERF_SERVER" -p "$IPERF_PORT" -t 30 -R + run_bench "NETWORK: ping latency (100 packets)" \ + ping -c 100 -i 0.2 -W 1 "$IPERF_SERVER" + else + echo "=== NETWORK: skipped (set IPERF_SERVER to enable) ===" | tee -a "$REPORT" + echo "" | tee -a "$REPORT" + fi + + # ── Entropy ────────────────────────────────────────────────────────── + # TDX attestation and key generation depend on RDRAND/entropy throughput. + run_bench "ENTROPY: /dev/urandom throughput (256 MB)" \ + dd if=/dev/urandom of=/dev/null bs=1M count=256 iflag=fullblock + + # ── Stress ─────────────────────────────────────────────────────────── + run_bench "STRESS: combined (4 cpu, 2 io, 2 vm)" \ + stress-ng --cpu 4 --io 2 --vm 2 --vm-bytes 1G --timeout 30s --metrics-brief + + # Clean up test file between iterations + rm -f "$TESTFILE" + +done + +rm -rf "$TESTDIR" +echo "========================================" | tee -a "$REPORT" +echo "Report saved to $REPORT" diff --git a/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-kernel.sh b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-kernel.sh new file mode 100755 index 00000000..d36f6747 --- /dev/null +++ b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-kernel.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +# bench-kernel.sh — Kernel-level benchmarks for hardening overhead measurement. +# +# Isolates kernel-level costs from hardening options (ASLR, FORTIFY_SOURCE, +# INIT_ON_ALLOC, RANDSTRUCT, SLAB hardening, etc.) by testing syscall latency, +# scheduler throughput, memory allocation, IPC, and scheduling jitter. +# Compare results between hardened and baseline kernel configs. +# +# Environment variables: +# ITERATIONS — number of benchmark iterations (default: 3) +set -euo pipefail + +ITERATIONS="${ITERATIONS:-3}" +REPORT="kernel_benchmark_report.txt" + +: > "$REPORT" + +echo "=== System Info ===" | tee -a "$REPORT" +uname -r | tee -a "$REPORT" +date -u | tee -a "$REPORT" +echo "" | tee -a "$REPORT" + +for i in $(seq 1 "$ITERATIONS"); do + echo "========================================" | tee -a "$REPORT" + echo "=== ITERATION $i/$ITERATIONS ===" | tee -a "$REPORT" + echo "" | tee -a "$REPORT" + + # ── Syscall & context-switch latency ────────────────────────────── + echo "=== PERF BENCH: sched pipe (syscall + context-switch latency) ===" | tee -a "$REPORT" + perf bench sched pipe -l 1000000 2>&1 | tee -a "$REPORT" + echo "" | tee -a "$REPORT" + + echo "=== PERF BENCH: sched messaging (scheduler throughput, 20 groups) ===" | tee -a "$REPORT" + perf bench sched messaging -g 20 -l 1000 2>&1 | tee -a "$REPORT" + echo "" | tee -a "$REPORT" + + # ── Memory bandwidth & latency ──────────────────────────────────── + echo "=== PERF BENCH: mem memcpy (1 GB, FORTIFY_SOURCE overhead) ===" | tee -a "$REPORT" + perf bench mem memcpy -s 1GB -l 5 2>&1 | tee -a "$REPORT" + echo "" | tee -a "$REPORT" + + echo "=== PERF BENCH: mem memset (1 GB, INIT_ON_ALLOC zeroing overhead) ===" | tee -a "$REPORT" + perf bench mem memset -s 1GB -l 5 2>&1 | tee -a "$REPORT" + echo "" | tee -a "$REPORT" + + # ── IPC & scheduler saturation ──────────────────────────────────── + echo "=== HACKBENCH: pipes + threads ===" | tee -a "$REPORT" + hackbench --pipe --threads -l 1000 2>&1 | tee -a "$REPORT" + echo "" | tee -a "$REPORT" + + echo "=== HACKBENCH: sockets + processes ===" | tee -a "$REPORT" + hackbench -l 1000 2>&1 | tee -a "$REPORT" + echo "" | tee -a "$REPORT" + + # ── Targeted stressors (30s each) ──────────────────────────────── + echo "=== STRESS-NG: syscall overhead ===" | tee -a "$REPORT" + stress-ng --syscall 1 --timeout 30 --metrics-brief 2>&1 | tee -a "$REPORT" + echo "" | tee -a "$REPORT" + + echo "=== STRESS-NG: malloc (INIT_ON_ALLOC / SLAB hardening overhead) ===" | tee -a "$REPORT" + stress-ng --malloc 1 --timeout 30 --metrics-brief 2>&1 | tee -a "$REPORT" + echo "" | tee -a "$REPORT" + + echo "=== STRESS-NG: fork (ASLR / stack canaries / RANDSTRUCT overhead) ===" | tee -a "$REPORT" + stress-ng --fork 1 --timeout 30 --metrics-brief 2>&1 | tee -a "$REPORT" + echo "" | tee -a "$REPORT" + + echo "=== STRESS-NG: context switch ===" | tee -a "$REPORT" + stress-ng --switch 1 --timeout 30 --metrics-brief 2>&1 | tee -a "$REPORT" + echo "" | tee -a "$REPORT" + + echo "=== STRESS-NG: pipe (IPC throughput) ===" | tee -a "$REPORT" + stress-ng --pipe 1 --timeout 30 --metrics-brief 2>&1 | tee -a "$REPORT" + echo "" | tee -a "$REPORT" + + # ── Scheduling latency ─────────────────────────────────────────── + echo "=== CYCLICTEST: scheduling latency (30s, 1000Ξs interval) ===" | tee -a "$REPORT" + cyclictest --mlockall -p80 -t1 -i1000 -l30000 -q 2>&1 | tee -a "$REPORT" + echo "" | tee -a "$REPORT" + +done + +echo "Report saved to $REPORT" diff --git a/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-preflight.sh b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-preflight.sh new file mode 100644 index 00000000..72ec4b65 --- /dev/null +++ b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-preflight.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +# Pre-flight check: detect and optionally stop application services +# that would skew benchmark results. +# +# Usage: bench-preflight.sh +# +# Environment variables: +# BENCH_STOP_SERVICES=1 stop detected services automatically +# +# Interactive (TTY detected): prompts user to stop services +# Non-interactive (no TTY): stops only if BENCH_STOP_SERVICES=1, otherwise warns +set -euo pipefail + +# ── iperf3 firewall check ───────────────────────────────────────────────────── +# Only runs when IPERF_SERVER is set (same env var bench-app.sh uses). +# Uses nc with a short timeout — if the port is unreachable, the OUTPUT chain +# is likely blocking it (the VM firewall defaults to DROP). +if [[ -n "${IPERF_SERVER:-}" ]]; then + _iperf_port="${IPERF_PORT:-5201}" + if ! nc -z -w2 "$IPERF_SERVER" "$_iperf_port" 2>/dev/null; then + echo "=== iperf3 firewall check ===" + echo "Cannot reach iperf3 server at $IPERF_SERVER:$_iperf_port" + echo "The VM firewall (default OUTPUT DROP) is likely blocking TCP port $_iperf_port." + echo "" + + _should_unblock=false + if [[ "${BENCH_UNBLOCK_IPERF:-0}" == "1" ]]; then + _should_unblock=true + elif [[ -t 0 ]]; then + read -rp "Add iptables rules to allow iperf3 on port $_iperf_port? [Y/n] " _reply + if [[ -z "$_reply" || "$_reply" =~ ^[Yy] ]]; then + _should_unblock=true + fi + else + echo "WARNING: iperf3 test will likely fail — set BENCH_UNBLOCK_IPERF=1 to unblock automatically." + fi + + if $_should_unblock; then + iptables -I OUTPUT 1 -p tcp --dport "$_iperf_port" -m comment --comment "bench-iperf3" -j ACCEPT + iptables -I INPUT 1 -p tcp --sport "$_iperf_port" -m comment --comment "bench-iperf3" -j ACCEPT + iptables -I OUTPUT 1 -p icmp -m comment --comment "bench-iperf3" -j ACCEPT + iptables -I INPUT 1 -p icmp -m comment --comment "bench-iperf3" -j ACCEPT + echo "iptables rules added — port $_iperf_port and ICMP unblocked (comment: bench-iperf3)." + fi + echo "" + fi +fi + +APP_SERVICES=( + lighthouse + searcher-container + cvm-reverse-proxy + ssh-pubkey-server + input-only-proxy + delay-pipe +) + +running=() +for svc in "${APP_SERVICES[@]}"; do + if systemctl is-active --quiet "$svc" 2>/dev/null; then + running+=("$svc") + fi +done + +if [[ ${#running[@]} -eq 0 ]]; then + echo "Pre-flight OK: no application services running." + exit 0 +fi + +echo "=== Pre-flight check ===" +echo "Running application services that may skew results:" +for svc in "${running[@]}"; do + pid=$(systemctl show -p MainPID --value "$svc" 2>/dev/null) + if [[ -n "$pid" && "$pid" != "0" ]]; then + cpu=$(ps -p "$pid" -o %cpu= 2>/dev/null || echo "?") + mem=$(ps -p "$pid" -o %mem= 2>/dev/null || echo "?") + printf " %-30s CPU: %s%% MEM: %s%%\n" "$svc" "${cpu// /}" "${mem// /}" + else + printf " %-30s (no main PID)\n" "$svc" + fi +done +echo "" + +if [[ "${BENCH_STOP_SERVICES:-0}" == "1" ]]; then + should_stop=true +elif [[ -t 0 ]]; then + read -rp "Stop these services before benchmarking? [Y/n] " reply + if [[ -z "$reply" || "$reply" =~ ^[Yy] ]]; then + should_stop=true + else + should_stop=false + fi +else + echo "WARNING: benchmarking with application services active — results may be noisy." + echo "Set BENCH_STOP_SERVICES=1 to stop them automatically." + should_stop=false +fi + +if $should_stop; then + for svc in "${running[@]}"; do + echo "Stopping $svc..." + systemctl stop "$svc" + done + echo "All application services stopped." +elif ! $should_stop; then + echo "WARNING: benchmarking with application services active — results may be noisy." +fi diff --git a/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-warmup.sh b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-warmup.sh new file mode 100755 index 00000000..6ee8f552 --- /dev/null +++ b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-warmup.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Warmup: prime subsystems before benchmark iterations to avoid cold-start skew. +# +# Currently warms up: +# - iperf3: short run to establish SLIRP buffers and TCP state +# - stress-ng syscall +# +# Add future warmup steps here as needed. +# +# Environment variables: +# IPERF_SERVER — iperf3 server address (same as bench-app.sh); warmup skipped if unset +# IPERF_PORT — iperf3 port (default: 5201) +set -euo pipefail + +IPERF_PORT="${IPERF_PORT:-5201}" + +# ── iperf3 ──────────────────────────────────────────────────────────────────── +if [[ -n "${IPERF_SERVER:-}" ]]; then + echo "Warming up iperf3 (5s, discarded)..." + iperf3 -c "$IPERF_SERVER" -p "$IPERF_PORT" -t 5 > /dev/null 2>&1 || true +fi + +# ── stress-ng syscall ───────────────────────────────────────────────────────── +# Iter 1 shows a 20x cold-boot spike (370 vs stable 17 ops/s). Running once +# here consumes the anomaly so all measured iterations land at the stable value. +echo "Warming up stress-ng syscall (5s, discarded)..." +stress-ng --syscall 1 --timeout 5 > /dev/null 2>&1 || true + +echo "Warmup done."