diff --git a/Makefile b/Makefile
index fbfe3092..74d13956 100644
--- a/Makefile
+++ b/Makefile
@@ -46,6 +46,10 @@ build: setup ## Build the specified module
 build-dev: setup ## Build module with development tools
 	$(WRAPPER) mkosi --force --image-id $(IMAGE)-dev --profile=devtools --include=images/$(IMAGE).conf
 
+# Build module with devtools and benchmark profiles
+build-bench: setup ## Build module with development and benchmark tools
+	$(WRAPPER) mkosi --force --image-id $(IMAGE)-bench --profile=devtools,benchmark --include=images/$(IMAGE).conf
+
 ##@ Utilities
 
 measure: ## Export TDX measurements for the built EFI file
diff --git a/README.md b/README.md
index 398917ee..f4dae735 100644
--- a/README.md
+++ b/README.md
@@ -161,6 +161,19 @@ try to disable apparmor's restriction:
 
 - If you encounter `bootctl: unrecognized option '--root=/buildroot'`, you'll need to upgrade to a newer version of systemd (at least v250), which is only supported by recent versions of Ubuntu/Debian.
 
+## Benchmarking
+
+The `benchmark` mkosi profile adds benchmarking tools to any image. See [mkosi.profiles/benchmark/mkosi.conf](mkosi.profiles/benchmark/mkosi.conf) for installed packages.
+
+```bash
+# Build with benchmark profile
+make build-bench IMAGE=flashbox-l1
+```
+
+There are two benchmark suites that can be run separately or together: one for kernel-level overhead (useful for measuring impact of kernel config changes) and one for application-level performance (CPU, disk I/O, network, entropy). Run `bench-all.sh` inside the VM to execute both.
+
+Before benchmarking, a preflight check detects resource-heavy services (e.g. lighthouse, searcher) and offers to stop them so they don't skew results. A short warmup pass runs first to burn off cold-start spikes.
+
 ## 📖 Documentation
 
 - [Development Guide](DEVELOPMENT.md) - Comprehensive guide for creating new modules and extending existing ones
diff --git a/mkosi.profiles/benchmark/mkosi.conf b/mkosi.profiles/benchmark/mkosi.conf
new file mode 100644
index 00000000..ecd7dd47
--- /dev/null
+++ b/mkosi.profiles/benchmark/mkosi.conf
@@ -0,0 +1,11 @@
+[Content]
+ExtraTrees=mkosi.extra
+
+Packages=sysbench
+         fio
+         libaio-dev
+         stress-ng
+         rt-tests
+         linux-perf
+         openssl
+         iperf3
\ No newline at end of file
diff --git a/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-all.sh b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-all.sh
new file mode 100755
index 00000000..e70ffe1c
--- /dev/null
+++ b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-all.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# Wrapper: runs both kernel-level and application-level benchmarks.
+# Usage: bench-all.sh [ITERATIONS]
+set -euo pipefail
+
+export ITERATIONS="${1:-3}"
+
+bench-preflight.sh
+bench-warmup.sh
+
+echo "=== Running kernel-level benchmarks ==="
+bench-kernel.sh
+
+echo ""
+echo "=== Running application-level benchmarks ==="
+bench-app.sh
+
+echo ""
+echo "=== Done ==="
+echo "Results in: kernel_benchmark_report.txt, benchmark_report.txt"
diff --git a/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-app.sh b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-app.sh
new file mode 100755
index 00000000..68103dc0
--- /dev/null
+++ b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-app.sh
@@ -0,0 +1,164 @@
+#!/usr/bin/env bash
+# bench-app.sh — Application-level benchmarks for kernel performance regression testing.
+#
+# Disk I/O tests use psync/mmap engines (CONFIG_AIO and CONFIG_IO_URING are
+# disabled per KSPP hardening). Test profiles reflect Ethereum node workloads:
+#   - 4K random mixed 75R/25W  (chain-following steady state)
+#   - 4K random write + fsync  (MDBX commit path)
+#   - Buffered/mmap 4K reads   (memory-mapped database access)
+#   - Sequential 1M r/w        (bulk data, compaction, snapshots)
+#
+# References:
+#   fio I/O engines (psync, libaio, io_uring):
+#     https://fio.readthedocs.io/en/latest/fio_doc.html
+#   MDBX I/O model (mmap reads, fdatasync commits):
+#     https://libmdbx.dqdkfa.ru/intro.html
+#     https://libmdbx.dqdkfa.ru/group__sync__modes.html
+#   Intel TDX performance benchmarking (MLC, fio, iperf3):
+#     https://www.intel.com/content/www/us/en/developer/articles/technical/tdx-performance-analysis-reference-documentation.html
+#   Ethereum node disk requirements:
+#     https://docs.nethermind.io/get-started/system-requirements/
+#     https://geth.ethereum.org/docs/getting-started/hardware-requirements
+#     https://reth.rs/run/system-requirements/
+#
+# Environment variables:
+#   ITERATIONS   — benchmark iterations      (default: 1)
+#   TESTDIR      — fio working directory       (default: /persistent/fio-tmp)
+#   SIZE         — fio test file size         (default: 2G)
+#   FIO_RUNTIME  — seconds per fio test       (default: 30)
+#   IPERF_SERVER — iperf3 server; skipped if unset
+#   IPERF_PORT   — iperf3 port               (default: 5201)
+set -uo pipefail
+
+ITERATIONS="${ITERATIONS:-1}"
+REPORT="benchmark_report.txt"
+
+TESTDIR="${TESTDIR:-/persistent/fio-tmp}"
+TESTFILE=""  # set after mkdir
+SIZE="${SIZE:-2G}"
+FIO_RUNTIME="${FIO_RUNTIME:-30}"
+
+IPERF_PORT="${IPERF_PORT:-5201}"
+
+mkdir -p "$TESTDIR"
+chmod 700 "$TESTDIR"
+TESTFILE="$TESTDIR/fio.test"
+
+: > "$REPORT"
+
+# Run a benchmark command, log output, continue on failure.
+run_bench() {
+  local label="$1"; shift
+  echo "=== $label ===" | tee -a "$REPORT"
+  if "$@" 2>&1 | tee -a "$REPORT"; then
+    :
+  else
+    echo "*** FAILED (exit $?): $label ***" | tee -a "$REPORT"
+  fi
+  echo "" | tee -a "$REPORT"
+}
+
+# Common fio arguments
+fio_common="--time_based --runtime=${FIO_RUNTIME} --group_reporting --thread"
+
+for i in $(seq 1 "$ITERATIONS"); do
+  echo "========================================" | tee -a "$REPORT"
+  echo "=== ITERATION $i/$ITERATIONS ===" | tee -a "$REPORT"
+  echo "" | tee -a "$REPORT"
+
+  # ── CPU ──────────────────────────────────────────────────────────────
+  run_bench "CPU: sysbench prime" \
+    sysbench cpu --cpu-max-prime=50000 --time=30 --threads=1 run
+
+  run_bench "CPU: openssl speed" \
+    openssl speed --seconds 10 aes-256-cbc rsa2048 sha256
+
+  # ── Memory ───────────────────────────────────────────────────────────
+  run_bench "MEMORY: sysbench random write" \
+    sysbench memory --memory-total-size=256G --memory-block-size=1Kb \
+      --memory-oper=write --memory-access-mode=rnd --threads=1 run
+
+  # ── Disk I/O ─────────────────────────────────────────────────────────
+  echo "=== DISK I/O (ioengine=psync, direct=1 unless noted) ===" | tee -a "$REPORT"
+  echo "" | tee -a "$REPORT"
+
+  # Sequential throughput — bulk data: WAL, compaction, snapshots
+  run_bench "DISK: sequential write throughput (1M, 4 jobs)" \
+    fio --name=seq-write $fio_common \
+      --ioengine=psync --direct=1 --rw=write --bs=1M \
+      --numjobs=4 --size="$SIZE" \
+      --filename="$TESTFILE"
+
+  run_bench "DISK: sequential read throughput (1M, 4 jobs)" \
+    fio --name=seq-read $fio_common \
+      --ioengine=psync --direct=1 --rw=read --bs=1M \
+      --numjobs=4 --size="$SIZE" \
+      --filename="$TESTFILE"
+
+  # Random 4K IOPS — database state access (dominant I/O pattern)
+  run_bench "DISK: random 4K read IOPS (16 jobs)" \
+    fio --name=rand-read $fio_common \
+      --ioengine=psync --direct=1 --rw=randread --bs=4k \
+      --numjobs=16 --size="$SIZE" \
+      --filename="$TESTFILE"
+
+  run_bench "DISK: random 4K write IOPS (16 jobs)" \
+    fio --name=rand-write $fio_common \
+      --ioengine=psync --direct=1 --rw=randwrite --bs=4k \
+      --numjobs=16 --size="$SIZE" \
+      --filename="$TESTFILE"
+
+  # Mixed 4K 75R/25W — Ethereum node steady-state profile
+  # Reference: Nethermind requires ≥10K IOPS (r/w); all clients require NVMe SSD
+  run_bench "DISK: random 4K mixed 75R/25W (16 jobs) — steady state" \
+    fio --name=rand-mixed $fio_common \
+      --ioengine=psync --direct=1 --rw=randrw --rwmixread=75 --bs=4k \
+      --numjobs=16 --size="$SIZE" \
+      --filename="$TESTFILE"
+
+  # fsync-per-write — measures full commit cycle (write + fdatasync)
+  # Key metric: "sync" percentiles in output, not "clat"
+  run_bench "DISK: random 4K write + fsync (1 job) — commit latency" \
+    fio --name=rand-fsync $fio_common \
+      --ioengine=psync --direct=1 --rw=randwrite --bs=4k \
+      --numjobs=1 --size="$SIZE" --fsync=1 \
+      --filename="$TESTFILE"
+
+  # Buffered mmap reads — page-cache path used by memory-mapped databases
+  # Validate: output should show major page faults (majf); if zero, data was cached
+  run_bench "DISK: buffered random 4K read (mmap, 4 jobs) — page-cache path" \
+    fio --name=mmap-read $fio_common \
+      --ioengine=mmap --direct=0 --rw=randread --bs=4k \
+      --numjobs=4 --size="$SIZE" \
+      --filename="$TESTFILE"
+
+  # ── Network ──────────────────────────────────────────────────────────
+  if [[ -n "${IPERF_SERVER:-}" ]]; then
+    run_bench "NETWORK: iperf3 upload (VM → host)" \
+      iperf3 -c "$IPERF_SERVER" -p "$IPERF_PORT" -t 30
+    run_bench "NETWORK: iperf3 download (host → VM)" \
+      iperf3 -c "$IPERF_SERVER" -p "$IPERF_PORT" -t 30 -R
+    run_bench "NETWORK: ping latency (100 packets)" \
+      ping -c 100 -i 0.2 -W 1 "$IPERF_SERVER"
+  else
+    echo "=== NETWORK: skipped (set IPERF_SERVER to enable) ===" | tee -a "$REPORT"
+    echo "" | tee -a "$REPORT"
+  fi
+
+  # ── Entropy ──────────────────────────────────────────────────────────
+  # TDX attestation and key generation depend on RDRAND/entropy throughput.
+  run_bench "ENTROPY: /dev/urandom throughput (256 MB)" \
+    dd if=/dev/urandom of=/dev/null bs=1M count=256 iflag=fullblock
+
+  # ── Stress ───────────────────────────────────────────────────────────
+  run_bench "STRESS: combined (4 cpu, 2 io, 2 vm)" \
+    stress-ng --cpu 4 --io 2 --vm 2 --vm-bytes 1G --timeout 30s --metrics-brief
+
+  # Clean up test file between iterations
+  rm -f "$TESTFILE"
+
+done
+
+rm -rf "$TESTDIR"
+echo "========================================" | tee -a "$REPORT"
+echo "Report saved to $REPORT"
diff --git a/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-kernel.sh b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-kernel.sh
new file mode 100755
index 00000000..d36f6747
--- /dev/null
+++ b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-kernel.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+# bench-kernel.sh — Kernel-level benchmarks for hardening overhead measurement.
+#
+# Isolates kernel-level costs from hardening options (ASLR, FORTIFY_SOURCE,
+# INIT_ON_ALLOC, RANDSTRUCT, SLAB hardening, etc.) by testing syscall latency,
+# scheduler throughput, memory allocation, IPC, and scheduling jitter.
+# Compare results between hardened and baseline kernel configs.
+#
+# Environment variables:
+#   ITERATIONS — number of benchmark iterations (default: 3)
+set -euo pipefail
+
+ITERATIONS="${ITERATIONS:-3}"
+REPORT="kernel_benchmark_report.txt"
+
+: > "$REPORT"
+
+echo "=== System Info ===" | tee -a "$REPORT"
+uname -r | tee -a "$REPORT"
+date -u | tee -a "$REPORT"
+echo "" | tee -a "$REPORT"
+
+for i in $(seq 1 "$ITERATIONS"); do
+  echo "========================================" | tee -a "$REPORT"
+  echo "=== ITERATION $i/$ITERATIONS ===" | tee -a "$REPORT"
+  echo "" | tee -a "$REPORT"
+
+  # ── Syscall & context-switch latency ──────────────────────────────
+  echo "=== PERF BENCH: sched pipe (syscall + context-switch latency) ===" | tee -a "$REPORT"
+  perf bench sched pipe -l 1000000 2>&1 | tee -a "$REPORT"
+  echo "" | tee -a "$REPORT"
+
+  echo "=== PERF BENCH: sched messaging (scheduler throughput, 20 groups) ===" | tee -a "$REPORT"
+  perf bench sched messaging -g 20 -l 1000 2>&1 | tee -a "$REPORT"
+  echo "" | tee -a "$REPORT"
+
+  # ── Memory bandwidth & latency ────────────────────────────────────
+  echo "=== PERF BENCH: mem memcpy (1 GB, FORTIFY_SOURCE overhead) ===" | tee -a "$REPORT"
+  perf bench mem memcpy -s 1GB -l 5 2>&1 | tee -a "$REPORT"
+  echo "" | tee -a "$REPORT"
+
+  echo "=== PERF BENCH: mem memset (1 GB, INIT_ON_ALLOC zeroing overhead) ===" | tee -a "$REPORT"
+  perf bench mem memset -s 1GB -l 5 2>&1 | tee -a "$REPORT"
+  echo "" | tee -a "$REPORT"
+
+  # ── IPC & scheduler saturation ────────────────────────────────────
+  echo "=== HACKBENCH: pipes + threads ===" | tee -a "$REPORT"
+  hackbench --pipe --threads -l 1000 2>&1 | tee -a "$REPORT"
+  echo "" | tee -a "$REPORT"
+
+  echo "=== HACKBENCH: sockets + processes ===" | tee -a "$REPORT"
+  hackbench -l 1000 2>&1 | tee -a "$REPORT"
+  echo "" | tee -a "$REPORT"
+
+  # ── Targeted stressors (30s each) ────────────────────────────────
+  echo "=== STRESS-NG: syscall overhead ===" | tee -a "$REPORT"
+  stress-ng --syscall 1 --timeout 30 --metrics-brief 2>&1 | tee -a "$REPORT"
+  echo "" | tee -a "$REPORT"
+
+  echo "=== STRESS-NG: malloc (INIT_ON_ALLOC / SLAB hardening overhead) ===" | tee -a "$REPORT"
+  stress-ng --malloc 1 --timeout 30 --metrics-brief 2>&1 | tee -a "$REPORT"
+  echo "" | tee -a "$REPORT"
+
+  echo "=== STRESS-NG: fork (ASLR / stack canaries / RANDSTRUCT overhead) ===" | tee -a "$REPORT"
+  stress-ng --fork 1 --timeout 30 --metrics-brief 2>&1 | tee -a "$REPORT"
+  echo "" | tee -a "$REPORT"
+
+  echo "=== STRESS-NG: context switch ===" | tee -a "$REPORT"
+  stress-ng --switch 1 --timeout 30 --metrics-brief 2>&1 | tee -a "$REPORT"
+  echo "" | tee -a "$REPORT"
+
+  echo "=== STRESS-NG: pipe (IPC throughput) ===" | tee -a "$REPORT"
+  stress-ng --pipe 1 --timeout 30 --metrics-brief 2>&1 | tee -a "$REPORT"
+  echo "" | tee -a "$REPORT"
+
+  # ── Scheduling latency ───────────────────────────────────────────
+  echo "=== CYCLICTEST: scheduling latency (30s, 1000μs interval) ===" | tee -a "$REPORT"
+  cyclictest --mlockall -p80 -t1 -i1000 -l30000 -q 2>&1 | tee -a "$REPORT"
+  echo "" | tee -a "$REPORT"
+
+done
+
+echo "Report saved to $REPORT"
diff --git a/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-preflight.sh b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-preflight.sh
new file mode 100644
index 00000000..72ec4b65
--- /dev/null
+++ b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-preflight.sh
@@ -0,0 +1,107 @@
+#!/usr/bin/env bash
+# Pre-flight check: detect and optionally stop application services
+# that would skew benchmark results.
+#
+# Usage: bench-preflight.sh
+#
+# Environment variables:
+#   BENCH_STOP_SERVICES=1    stop detected services automatically
+#
+# Interactive (TTY detected):   prompts user to stop services
+# Non-interactive (no TTY):     stops only if BENCH_STOP_SERVICES=1, otherwise warns
+set -euo pipefail
+
+# ── iperf3 firewall check ─────────────────────────────────────────────────────
+# Only runs when IPERF_SERVER is set (same env var bench-app.sh uses).
+# Uses nc with a short timeout — if the port is unreachable, the OUTPUT chain
+# is likely blocking it (the VM firewall defaults to DROP).
+if [[ -n "${IPERF_SERVER:-}" ]]; then
+  _iperf_port="${IPERF_PORT:-5201}"
+  if ! nc -z -w2 "$IPERF_SERVER" "$_iperf_port" 2>/dev/null; then
+    echo "=== iperf3 firewall check ==="
+    echo "Cannot reach iperf3 server at $IPERF_SERVER:$_iperf_port"
+    echo "The VM firewall (default OUTPUT DROP) is likely blocking TCP port $_iperf_port."
+    echo ""
+
+    _should_unblock=false
+    if [[ "${BENCH_UNBLOCK_IPERF:-0}" == "1" ]]; then
+      _should_unblock=true
+    elif [[ -t 0 ]]; then
+      read -rp "Add iptables rules to allow iperf3 on port $_iperf_port? [Y/n] " _reply
+      if [[ -z "$_reply" || "$_reply" =~ ^[Yy] ]]; then
+        _should_unblock=true
+      fi
+    else
+      echo "WARNING: iperf3 test will likely fail — set BENCH_UNBLOCK_IPERF=1 to unblock automatically."
+    fi
+
+    if $_should_unblock; then
+      iptables -I OUTPUT 1 -p tcp  --dport "$_iperf_port" -m comment --comment "bench-iperf3" -j ACCEPT
+      iptables -I INPUT  1 -p tcp  --sport "$_iperf_port" -m comment --comment "bench-iperf3" -j ACCEPT
+      iptables -I OUTPUT 1 -p icmp                        -m comment --comment "bench-iperf3" -j ACCEPT
+      iptables -I INPUT  1 -p icmp                        -m comment --comment "bench-iperf3" -j ACCEPT
+      echo "iptables rules added — port $_iperf_port and ICMP unblocked (comment: bench-iperf3)."
+    fi
+    echo ""
+  fi
+fi
+
+APP_SERVICES=(
+  lighthouse
+  searcher-container
+  cvm-reverse-proxy
+  ssh-pubkey-server
+  input-only-proxy
+  delay-pipe
+)
+
+running=()
+for svc in "${APP_SERVICES[@]}"; do
+  if systemctl is-active --quiet "$svc" 2>/dev/null; then
+    running+=("$svc")
+  fi
+done
+
+if [[ ${#running[@]} -eq 0 ]]; then
+  echo "Pre-flight OK: no application services running."
+  exit 0
+fi
+
+echo "=== Pre-flight check ==="
+echo "Running application services that may skew results:"
+for svc in "${running[@]}"; do
+  pid=$(systemctl show -p MainPID --value "$svc" 2>/dev/null)
+  if [[ -n "$pid" && "$pid" != "0" ]]; then
+    cpu=$(ps -p "$pid" -o %cpu= 2>/dev/null || echo "?")
+    mem=$(ps -p "$pid" -o %mem= 2>/dev/null || echo "?")
+    printf "  %-30s  CPU: %s%%  MEM: %s%%\n" "$svc" "${cpu// /}" "${mem// /}"
+  else
+    printf "  %-30s  (no main PID)\n" "$svc"
+  fi
+done
+echo ""
+
+if [[ "${BENCH_STOP_SERVICES:-0}" == "1" ]]; then
+  should_stop=true
+elif [[ -t 0 ]]; then
+  read -rp "Stop these services before benchmarking? [Y/n] " reply
+  if [[ -z "$reply" || "$reply" =~ ^[Yy] ]]; then
+    should_stop=true
+  else
+    should_stop=false
+  fi
+else
+  echo "WARNING: benchmarking with application services active — results may be noisy."
+  echo "Set BENCH_STOP_SERVICES=1 to stop them automatically."
+  should_stop=false
+fi
+
+if $should_stop; then
+  for svc in "${running[@]}"; do
+    echo "Stopping $svc..."
+    systemctl stop "$svc"
+  done
+  echo "All application services stopped."
+elif ! $should_stop; then
+  echo "WARNING: benchmarking with application services active — results may be noisy."
+fi
diff --git a/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-warmup.sh b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-warmup.sh
new file mode 100755
index 00000000..6ee8f552
--- /dev/null
+++ b/mkosi.profiles/benchmark/mkosi.extra/usr/local/bin/bench-warmup.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+# Warmup: prime subsystems before benchmark iterations to avoid cold-start skew.
+#
+# Currently warms up:
+#   - iperf3:          short run to establish SLIRP buffers and TCP state
+#   - stress-ng syscall
+#
+# Add future warmup steps here as needed.
+#
+# Environment variables:
+#   IPERF_SERVER — iperf3 server address (same as bench-app.sh); warmup skipped if unset
+#   IPERF_PORT   — iperf3 port (default: 5201)
+set -euo pipefail
+
+IPERF_PORT="${IPERF_PORT:-5201}"
+
+# ── iperf3 ────────────────────────────────────────────────────────────────────
+if [[ -n "${IPERF_SERVER:-}" ]]; then
+  echo "Warming up iperf3 (5s, discarded)..."
+  iperf3 -c "$IPERF_SERVER" -p "$IPERF_PORT" -t 5 > /dev/null 2>&1 || true
+fi
+
+# ── stress-ng syscall ─────────────────────────────────────────────────────────
+# Iter 1 shows a 20x cold-boot spike (370 vs stable 17 ops/s). Running once
+# here consumes the anomaly so all measured iterations land at the stable value.
+echo "Warming up stress-ng syscall (5s, discarded)..."
+stress-ng --syscall 1 --timeout 5 > /dev/null 2>&1 || true
+
+echo "Warmup done."