From c4adb9b483bc3b5d5f9cd6c0d440d3e5b819002b Mon Sep 17 00:00:00 2001
From: Arthur Pastel <arthur.pastel@gmail.com>
Date: Tue, 3 Jun 2025 17:18:38 +0200
Subject: [PATCH 1/2] feat: make sure the benchmark fixture can only be called
 once per bench

---
 src/pytest_codspeed/plugin.py | 20 +++++++++++++-------
 tests/test_pytest_plugin.py   | 22 ++++++++++++++++++++++
 2 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/src/pytest_codspeed/plugin.py b/src/pytest_codspeed/plugin.py
index f3dfdc2..3a20675 100644
--- a/src/pytest_codspeed/plugin.py
+++ b/src/pytest_codspeed/plugin.py
@@ -318,14 +318,20 @@ def __init__(self, request: pytest.FixtureRequest):
         self.extra_info: dict = {}
 
         self._request = request
-
-    def __call__(self, func: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> T:
-        config = self._request.config
-        plugin = get_plugin(config)
-        if plugin.is_codspeed_enabled:
-            return _measure(plugin, self._request.node, config, func, *args, **kwargs)
+        self._config = self._request.config
+        self._plugin = get_plugin(self._config)
+        self._called = False
+
+    def __call__(self, target: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> T:
+        if self._called:
+            raise RuntimeError("The benchmark fixture can only be used once per test")
+        self._called = True
+        if self._plugin.is_codspeed_enabled:
+            return _measure(
+                self._plugin, self._request.node, self._config, target, *args, **kwargs
+            )
         else:
-            return func(*args, **kwargs)
+            return target(*args, **kwargs)
 
 
 @pytest.fixture(scope="function")
diff --git a/tests/test_pytest_plugin.py b/tests/test_pytest_plugin.py
index fa19362..8e7b776 100644
--- a/tests/test_pytest_plugin.py
+++ b/tests/test_pytest_plugin.py
@@ -338,3 +338,25 @@ def test_capsys(capsys):
     result.assert_outcomes(passed=1)
     result.stdout.no_fnmatch_line("*print to stdout*")
     result.stderr.no_fnmatch_line("*print to stderr*")
+
+@pytest.mark.parametrize("mode", [*MeasurementMode])
+def test_benchmark_fixture_used_twice(
+    pytester: pytest.Pytester, mode: MeasurementMode
+) -> None:
+    """Test that using the benchmark fixture twice in a test raises an error."""
+    pytester.makepyfile(
+        """
+        def test_benchmark_used_twice(benchmark):
+            def foo():
+                pass
+
+            benchmark(foo)
+            benchmark(foo)
+        """
+    )
+    result = run_pytest_codspeed_with_mode(pytester, mode)
+    assert result.ret == 1, "the run should have failed"
+    result.stdout.fnmatch_lines(
+        ["*RuntimeError: The benchmark fixture can only be used once per test*"]
+    )
+

From 96fe457ff3bba6c22194a5b7f94a683ef31bd727 Mon Sep 17 00:00:00 2001
From: Arthur Pastel <arthur.pastel@gmail.com>
Date: Thu, 5 Jun 2025 22:22:12 +0200
Subject: [PATCH 2/2] feat: support pytest-benchmark's pedantic API

---
 src/pytest_codspeed/config.py                 |  63 ++++-
 src/pytest_codspeed/instruments/__init__.py   |  20 +-
 src/pytest_codspeed/instruments/valgrind.py   |  56 ++++-
 src/pytest_codspeed/instruments/walltime.py   | 216 ++++++++++--------
 src/pytest_codspeed/plugin.py                 |  92 ++++++--
 tests/test_pytest_plugin.py                   |  78 +++++++
 .../test_pytest_plugin_cpu_instrumentation.py |  81 +++++++
 tests/test_pytest_plugin_walltime.py          |  51 +++++
 8 files changed, 530 insertions(+), 127 deletions(-)

diff --git a/src/pytest_codspeed/config.py b/src/pytest_codspeed/config.py
index 0d2d881..1932a8e 100644
--- a/src/pytest_codspeed/config.py
+++ b/src/pytest_codspeed/config.py
@@ -1,9 +1,14 @@
 from __future__ import annotations
 
-from dataclasses import dataclass
-from typing import TYPE_CHECKING
+import dataclasses
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Generic, TypeVar
+
+T = TypeVar("T")
 
 if TYPE_CHECKING:
+    from typing import Any, Callable
+
     import pytest
 
 
@@ -64,17 +69,51 @@ def from_pytest_item(cls, item: pytest.Item) -> BenchmarkMarkerOptions:
             raise ValueError(
                 "Positional arguments are not allowed in the benchmark marker"
             )
+        kwargs = marker.kwargs
 
-        options = cls(
-            group=marker.kwargs.pop("group", None),
-            min_time=marker.kwargs.pop("min_time", None),
-            max_time=marker.kwargs.pop("max_time", None),
-            max_rounds=marker.kwargs.pop("max_rounds", None),
-        )
-
-        if len(marker.kwargs) > 0:
+        unknown_kwargs = set(kwargs.keys()) - {
+            field.name for field in dataclasses.fields(cls)
+        }
+        if unknown_kwargs:
             raise ValueError(
                 "Unknown kwargs passed to benchmark marker: "
-                + ", ".join(marker.kwargs.keys())
+                + ", ".join(sorted(unknown_kwargs))
             )
-        return options
+
+        return cls(**kwargs)
+
+
+@dataclass(frozen=True)
+class PedanticOptions(Generic[T]):
+    """Parameters for running a benchmark using the pedantic fixture API."""
+
+    target: Callable[..., T]
+    setup: Callable[[], Any | None] | None
+    teardown: Callable[..., Any | None] | None
+    rounds: int
+    warmup_rounds: int
+    iterations: int
+    args: tuple[Any, ...] = field(default_factory=tuple)
+    kwargs: dict[str, Any] = field(default_factory=dict)
+
+    def __post_init__(self) -> None:
+        if self.rounds < 0:
+            raise ValueError("rounds must be positive")
+        if self.warmup_rounds < 0:
+            raise ValueError("warmup_rounds must be non-negative")
+        if self.iterations <= 0:
+            raise ValueError("iterations must be positive")
+        if self.iterations > 1 and self.setup is not None:
+            raise ValueError(
+                "setup cannot be used with multiple iterations, use multiple rounds"
+            )
+
+    def setup_and_get_args_kwargs(self) -> tuple[tuple[Any, ...], dict[str, Any]]:
+        if self.setup is None:
+            return self.args, self.kwargs
+        maybe_result = self.setup(*self.args, **self.kwargs)
+        if maybe_result is not None:
+            if len(self.args) > 0 or len(self.kwargs) > 0:
+                raise ValueError("setup cannot return a value when args are provided")
+            return maybe_result
+        return self.args, self.kwargs
diff --git a/src/pytest_codspeed/instruments/__init__.py b/src/pytest_codspeed/instruments/__init__.py
index d163783..fb264f1 100644
--- a/src/pytest_codspeed/instruments/__init__.py
+++ b/src/pytest_codspeed/instruments/__init__.py
@@ -5,15 +5,14 @@
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from typing import Any, Callable, ClassVar, ParamSpec, TypeVar
+    from typing import Any, Callable, ClassVar, TypeVar
 
     import pytest
 
-    from pytest_codspeed.config import BenchmarkMarkerOptions
+    from pytest_codspeed.config import BenchmarkMarkerOptions, PedanticOptions
     from pytest_codspeed.plugin import CodSpeedConfig
 
     T = TypeVar("T")
-    P = ParamSpec("P")
 
 
 class Instrument(metaclass=ABCMeta):
@@ -31,9 +30,18 @@ def measure(
         marker_options: BenchmarkMarkerOptions,
         name: str,
         uri: str,
-        fn: Callable[P, T],
-        *args: P.args,
-        **kwargs: P.kwargs,
+        fn: Callable[..., T],
+        *args: tuple,
+        **kwargs: dict[str, Any],
+    ) -> T: ...
+
+    @abstractmethod
+    def measure_pedantic(
+        self,
+        marker_options: BenchmarkMarkerOptions,
+        pedantic_options: PedanticOptions[T],
+        name: str,
+        uri: str,
     ) -> T: ...
 
     @abstractmethod
diff --git a/src/pytest_codspeed/instruments/valgrind.py b/src/pytest_codspeed/instruments/valgrind.py
index 4476a19..9f135f4 100644
--- a/src/pytest_codspeed/instruments/valgrind.py
+++ b/src/pytest_codspeed/instruments/valgrind.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import sys
+import warnings
 from typing import TYPE_CHECKING
 
 from pytest_codspeed import __semver_version__
@@ -12,7 +13,8 @@
 
     from pytest import Session
 
-    from pytest_codspeed.instruments import P, T
+    from pytest_codspeed.config import PedanticOptions
+    from pytest_codspeed.instruments import T
     from pytest_codspeed.plugin import BenchmarkMarkerOptions, CodSpeedConfig
 
 SUPPORTS_PERF_TRAMPOLINE = sys.version_info >= (3, 12)
@@ -52,9 +54,9 @@ def measure(
         marker_options: BenchmarkMarkerOptions,
         name: str,
         uri: str,
-        fn: Callable[P, T],
-        *args: P.args,
-        **kwargs: P.kwargs,
+        fn: Callable[..., T],
+        *args: tuple,
+        **kwargs: dict[str, Any],
     ) -> T:
         self.benchmark_count += 1
 
@@ -78,8 +80,54 @@ def __codspeed_root_frame__() -> T:
             self.instrument_hooks.lib.callgrind_stop_instrumentation()
             self.instrument_hooks.set_executed_benchmark(uri)
 
+    def measure_pedantic(
+        self,
+        marker_options: BenchmarkMarkerOptions,
+        pedantic_options: PedanticOptions[T],
+        name: str,
+        uri: str,
+    ) -> T:
+        if pedantic_options.rounds != 1 or pedantic_options.iterations != 1:
+            warnings.warn(
+                "Valgrind instrument ignores rounds and iterations settings "
+                "in pedantic mode"
+            )
+        if not self.instrument_hooks:
+            args, kwargs = pedantic_options.setup_and_get_args_kwargs()
+            out = pedantic_options.target(*args, **kwargs)
+            if pedantic_options.teardown is not None:
+                pedantic_options.teardown(*args, **kwargs)
+            return out
+
+        def __codspeed_root_frame__(*args, **kwargs) -> T:
+            return pedantic_options.target(*args, **kwargs)
+
+        # Warmup
+        warmup_rounds = max(
+            pedantic_options.warmup_rounds, 1 if SUPPORTS_PERF_TRAMPOLINE else 0
+        )
+        for _ in range(warmup_rounds):
+            args, kwargs = pedantic_options.setup_and_get_args_kwargs()
+            __codspeed_root_frame__(*args, **kwargs)
+            if pedantic_options.teardown is not None:
+                pedantic_options.teardown(*args, **kwargs)
+
+        # Compute the actual result of the function
+        args, kwargs = pedantic_options.setup_and_get_args_kwargs()
+        self.instrument_hooks.lib.callgrind_start_instrumentation()
+        try:
+            out = __codspeed_root_frame__(*args, **kwargs)
+        finally:
+            self.instrument_hooks.lib.callgrind_stop_instrumentation()
+            self.instrument_hooks.set_executed_benchmark(uri)
+            if pedantic_options.teardown is not None:
+                pedantic_options.teardown(*args, **kwargs)
+
+        return out
+
     def report(self, session: Session) -> None:
         reporter = session.config.pluginmanager.get_plugin("terminalreporter")
+        assert reporter is not None, "terminalreporter not found"
         count_suffix = "benchmarked" if self.should_measure else "benchmark tested"
         reporter.write_sep(
             "=",
diff --git a/src/pytest_codspeed/instruments/walltime.py b/src/pytest_codspeed/instruments/walltime.py
index 28f2412..a992477 100644
--- a/src/pytest_codspeed/instruments/walltime.py
+++ b/src/pytest_codspeed/instruments/walltime.py
@@ -22,7 +22,8 @@
 
     from pytest import Session
 
-    from pytest_codspeed.instruments import P, T
+    from pytest_codspeed.config import PedanticOptions
+    from pytest_codspeed.instruments import T
     from pytest_codspeed.plugin import BenchmarkMarkerOptions, CodSpeedConfig
 
 DEFAULT_WARMUP_TIME_NS = 1_000_000_000
@@ -153,80 +154,6 @@ class Benchmark:
     stats: BenchmarkStats
 
 
-def run_benchmark(
-    instrument_hooks: InstrumentHooks | None,
-    name: str,
-    uri: str,
-    fn: Callable[P, T],
-    args,
-    kwargs,
-    config: BenchmarkConfig,
-) -> tuple[Benchmark, T]:
-    def __codspeed_root_frame__() -> T:
-        return fn(*args, **kwargs)
-
-    # Compute the actual result of the function
-    out = __codspeed_root_frame__()
-
-    # Warmup
-    times_per_round_ns: list[float] = []
-    warmup_start = start = perf_counter_ns()
-    while True:
-        start = perf_counter_ns()
-        __codspeed_root_frame__()
-        end = perf_counter_ns()
-        times_per_round_ns.append(end - start)
-        if end - warmup_start > config.warmup_time_ns:
-            break
-
-    # Round sizing
-    warmup_mean_ns = mean(times_per_round_ns)
-    warmup_iters = len(times_per_round_ns)
-    times_per_round_ns.clear()
-    iter_per_round = (
-        int(ceil(config.min_round_time_ns / warmup_mean_ns))
-        if warmup_mean_ns <= config.min_round_time_ns
-        else 1
-    )
-    if config.max_rounds is None:
-        round_time_ns = warmup_mean_ns * iter_per_round
-        rounds = int(config.max_time_ns / round_time_ns)
-    else:
-        rounds = config.max_rounds
-    rounds = max(1, rounds)
-
-    # Benchmark
-    iter_range = range(iter_per_round)
-    run_start = perf_counter_ns()
-    if instrument_hooks:
-        instrument_hooks.start_benchmark()
-    for _ in range(rounds):
-        start = perf_counter_ns()
-        for _ in iter_range:
-            __codspeed_root_frame__()
-        end = perf_counter_ns()
-        times_per_round_ns.append(end - start)
-
-        if end - run_start > config.max_time_ns:
-            # TODO: log something
-            break
-    if instrument_hooks:
-        instrument_hooks.stop_benchmark()
-        instrument_hooks.set_executed_benchmark(uri)
-    benchmark_end = perf_counter_ns()
-    total_time = (benchmark_end - run_start) / 1e9
-
-    stats = BenchmarkStats.from_list(
-        times_per_round_ns,
-        rounds=rounds,
-        total_time=total_time,
-        iter_per_round=iter_per_round,
-        warmup_iters=warmup_iters,
-    )
-
-    return Benchmark(name=name, uri=uri, config=config, stats=stats), out
-
-
 class WallTimeInstrument(Instrument):
     instrument = "walltime"
     instrument_hooks: InstrumentHooks | None
@@ -253,26 +180,137 @@ def measure(
         marker_options: BenchmarkMarkerOptions,
         name: str,
         uri: str,
-        fn: Callable[P, T],
-        *args: P.args,
-        **kwargs: P.kwargs,
+        fn: Callable[..., T],
+        *args: tuple,
+        **kwargs: dict[str, Any],
     ) -> T:
-        bench, out = run_benchmark(
-            instrument_hooks=self.instrument_hooks,
-            name=name,
-            uri=uri,
-            fn=fn,
-            args=args,
-            kwargs=kwargs,
-            config=BenchmarkConfig.from_codspeed_config_and_marker_data(
-                self.config, marker_options
-            ),
+        benchmark_config = BenchmarkConfig.from_codspeed_config_and_marker_data(
+            self.config, marker_options
+        )
+
+        def __codspeed_root_frame__() -> T:
+            return fn(*args, **kwargs)
+
+        # Compute the actual result of the function
+        out = __codspeed_root_frame__()
+
+        # Warmup
+        times_per_round_ns: list[float] = []
+        warmup_start = start = perf_counter_ns()
+        while True:
+            start = perf_counter_ns()
+            __codspeed_root_frame__()
+            end = perf_counter_ns()
+            times_per_round_ns.append(end - start)
+            if end - warmup_start > benchmark_config.warmup_time_ns:
+                break
+
+        # Round sizing
+        warmup_mean_ns = mean(times_per_round_ns)
+        warmup_iters = len(times_per_round_ns)
+        times_per_round_ns.clear()
+        iter_per_round = (
+            int(ceil(benchmark_config.min_round_time_ns / warmup_mean_ns))
+            if warmup_mean_ns <= benchmark_config.min_round_time_ns
+            else 1
+        )
+        if benchmark_config.max_rounds is None:
+            round_time_ns = warmup_mean_ns * iter_per_round
+            rounds = int(benchmark_config.max_time_ns / round_time_ns)
+        else:
+            rounds = benchmark_config.max_rounds
+        rounds = max(1, rounds)
+
+        # Benchmark
+        iter_range = range(iter_per_round)
+        run_start = perf_counter_ns()
+        for _ in range(rounds):
+            start = perf_counter_ns()
+            for _ in iter_range:
+                __codspeed_root_frame__()
+            end = perf_counter_ns()
+            times_per_round_ns.append(end - start)
+
+            if end - run_start > benchmark_config.max_time_ns:
+                # TODO: log something
+                break
+        benchmark_end = perf_counter_ns()
+        total_time = (benchmark_end - run_start) / 1e9
+
+        stats = BenchmarkStats.from_list(
+            times_per_round_ns,
+            rounds=rounds,
+            total_time=total_time,
+            iter_per_round=iter_per_round,
+            warmup_iters=warmup_iters,
+        )
+
+        self.benchmarks.append(
+            Benchmark(name=name, uri=uri, config=benchmark_config, stats=stats)
+        )
+        return out
+
+    def measure_pedantic(
+        self,
+        marker_options: BenchmarkMarkerOptions,
+        pedantic_options: PedanticOptions[T],
+        name: str,
+        uri: str,
+    ) -> T:
+        benchmark_config = BenchmarkConfig.from_codspeed_config_and_marker_data(
+            self.config, marker_options
+        )
+
+        def __codspeed_root_frame__(*args, **kwargs) -> T:
+            return pedantic_options.target(*args, **kwargs)
+
+        iter_range = range(pedantic_options.iterations)
+
+        # Warmup
+        for _ in range(pedantic_options.warmup_rounds):
+            args, kwargs = pedantic_options.setup_and_get_args_kwargs()
+            for _ in iter_range:
+                __codspeed_root_frame__(*args, **kwargs)
+            if pedantic_options.teardown is not None:
+                pedantic_options.teardown(*args, **kwargs)
+
+        # Benchmark
+        times_per_round_ns: list[float] = []
+        benchmark_start = perf_counter_ns()
+        for _ in range(pedantic_options.rounds):
+            start = perf_counter_ns()
+            args, kwargs = pedantic_options.setup_and_get_args_kwargs()
+            for _ in iter_range:
+                __codspeed_root_frame__(*args, **kwargs)
+            end = perf_counter_ns()
+            times_per_round_ns.append(end - start)
+            if pedantic_options.teardown is not None:
+                pedantic_options.teardown(*args, **kwargs)
+
+        benchmark_end = perf_counter_ns()
+        total_time = (benchmark_end - benchmark_start) / 1e9
+        stats = BenchmarkStats.from_list(
+            times_per_round_ns,
+            rounds=pedantic_options.rounds,
+            total_time=total_time,
+            iter_per_round=pedantic_options.iterations,
+            warmup_iters=pedantic_options.warmup_rounds,
+        )
+
+        # Compute the actual result of the function
+        args, kwargs = pedantic_options.setup_and_get_args_kwargs()
+        out = __codspeed_root_frame__(*args, **kwargs)
+        if pedantic_options.teardown is not None:
+            pedantic_options.teardown(*args, **kwargs)
+
+        self.benchmarks.append(
+            Benchmark(name=name, uri=uri, config=benchmark_config, stats=stats)
         )
-        self.benchmarks.append(bench)
         return out
 
     def report(self, session: Session) -> None:
         reporter = session.config.pluginmanager.get_plugin("terminalreporter")
+        assert reporter is not None, "terminalreporter not found"
 
         if len(self.benchmarks) == 0:
             reporter.write_sep(
diff --git a/src/pytest_codspeed/plugin.py b/src/pytest_codspeed/plugin.py
index 3a20675..16f0f06 100644
--- a/src/pytest_codspeed/plugin.py
+++ b/src/pytest_codspeed/plugin.py
@@ -14,7 +14,11 @@
 import pytest
 from _pytest.fixtures import FixtureManager
 
-from pytest_codspeed.config import BenchmarkMarkerOptions, CodSpeedConfig
+from pytest_codspeed.config import (
+    BenchmarkMarkerOptions,
+    CodSpeedConfig,
+    PedanticOptions,
+)
 from pytest_codspeed.instruments import (
     MeasurementMode,
     get_instrument_from_mode,
@@ -27,12 +31,11 @@
 from . import __version__
 
 if TYPE_CHECKING:
-    from typing import Callable, ParamSpec, TypeVar
+    from typing import Any, Callable, TypeVar
 
     from pytest_codspeed.instruments import Instrument
 
     T = TypeVar("T")
-    P = ParamSpec("P")
 
 IS_PYTEST_BENCHMARK_INSTALLED = importlib.util.find_spec("pytest_benchmark") is not None
 IS_PYTEST_SPEED_INSTALLED = importlib.util.find_spec("pytest_speed") is not None
@@ -137,14 +140,14 @@ def pytest_configure(config: pytest.Config):
 
     profile_folder = os.environ.get("CODSPEED_PROFILE_FOLDER")
 
-    codspeedconfig = CodSpeedConfig.from_pytest_config(config)
+    codspeed_config = CodSpeedConfig.from_pytest_config(config)
 
     plugin = CodSpeedPlugin(
         disabled_plugins=tuple(disabled_plugins),
         is_codspeed_enabled=is_codspeed_enabled,
         mode=mode,
-        instrument=instrument(codspeedconfig),
-        config=codspeedconfig,
+        instrument=instrument(codspeed_config),
+        config=codspeed_config,
         profile_folder=Path(profile_folder) if profile_folder else None,
     )
     config.pluginmanager.register(plugin, PLUGIN_NAME)
@@ -235,9 +238,10 @@ def _measure(
     plugin: CodSpeedPlugin,
     node: pytest.Item,
     config: pytest.Config,
-    fn: Callable[P, T],
-    *args: P.args,
-    **kwargs: P.kwargs,
+    pedantic_options: PedanticOptions | None,
+    fn: Callable[..., T],
+    args: tuple[Any, ...],
+    kwargs: dict[str, Any],
 ) -> T:
     marker_options = BenchmarkMarkerOptions.from_pytest_item(node)
     random.seed(0)
@@ -247,7 +251,14 @@ def _measure(
         gc.disable()
     try:
         uri, name = get_git_relative_uri_and_name(node.nodeid, config.rootpath)
-        return plugin.instrument.measure(marker_options, name, uri, fn, *args, **kwargs)
+        if pedantic_options is None:
+            return plugin.instrument.measure(
+                marker_options, name, uri, fn, *args, **kwargs
+            )
+        else:
+            return plugin.instrument.measure_pedantic(
+                marker_options, pedantic_options, name, uri
+            )
     finally:
         # Ensure GC is re-enabled even if the test failed
         if is_gc_enabled:
@@ -258,11 +269,11 @@ def wrap_runtest(
     plugin: CodSpeedPlugin,
     node: pytest.Item,
     config: pytest.Config,
-    fn: Callable[P, T],
-) -> Callable[P, T]:
+    fn: Callable[..., T],
+) -> Callable[..., T]:
     @functools.wraps(fn)
-    def wrapped(*args: P.args, **kwargs: P.kwargs) -> T:
-        return _measure(plugin, node, config, fn, *args, **kwargs)
+    def wrapped(*args: tuple, **kwargs: dict[str, Any]) -> T:
+        return _measure(plugin, node, config, None, fn, args, kwargs)
 
     return wrapped
 
@@ -322,17 +333,66 @@ def __init__(self, request: pytest.FixtureRequest):
         self._plugin = get_plugin(self._config)
         self._called = False
 
-    def __call__(self, target: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> T:
+    def __call__(
+        self, target: Callable[..., T], *args: tuple, **kwargs: dict[str, Any]
+    ) -> T:
         if self._called:
             raise RuntimeError("The benchmark fixture can only be used once per test")
         self._called = True
         if self._plugin.is_codspeed_enabled:
             return _measure(
-                self._plugin, self._request.node, self._config, target, *args, **kwargs
+                self._plugin,
+                self._request.node,
+                self._config,
+                None,
+                target,
+                args,
+                kwargs,
             )
         else:
             return target(*args, **kwargs)
 
+    def pedantic(
+        self,
+        target: Callable[..., T],
+        args: tuple[Any, ...] = (),
+        kwargs: dict[str, Any] = {},
+        setup: Callable | None = None,
+        teardown: Callable | None = None,
+        rounds: int = 1,
+        warmup_rounds: int = 0,
+        iterations: int = 1,
+    ):
+        if self._called:
+            raise RuntimeError("The benchmark fixture can only be used once per test")
+        self._called = True
+        pedantic_options = PedanticOptions(
+            target=target,
+            args=args,
+            kwargs=kwargs,
+            setup=setup,
+            teardown=teardown,
+            rounds=rounds,
+            warmup_rounds=warmup_rounds,
+            iterations=iterations,
+        )
+        if self._plugin.is_codspeed_enabled:
+            return _measure(
+                self._plugin,
+                self._request.node,
+                self._config,
+                pedantic_options,
+                target,
+                args,
+                kwargs,
+            )
+        else:
+            args, kwargs = pedantic_options.setup_and_get_args_kwargs()
+            result = target(*args, **kwargs)
+            if pedantic_options.teardown is not None:
+                pedantic_options.teardown(*args, **kwargs)
+            return result
+
 
 @pytest.fixture(scope="function")
 def codspeed_benchmark(request: pytest.FixtureRequest) -> Callable:
diff --git a/tests/test_pytest_plugin.py b/tests/test_pytest_plugin.py
index 8e7b776..89cd7b1 100644
--- a/tests/test_pytest_plugin.py
+++ b/tests/test_pytest_plugin.py
@@ -339,6 +339,63 @@ def test_capsys(capsys):
     result.stdout.no_fnmatch_line("*print to stdout*")
     result.stderr.no_fnmatch_line("*print to stderr*")
 
+
+@pytest.mark.xfail(reason="not supported by pytest-benchmark, see #78")
+@pytest.mark.parametrize("mode", [*MeasurementMode])
+def test_stateful_warmup_fixture(
+    pytester: pytest.Pytester, mode: MeasurementMode
+) -> None:
+    """Test that the stateful warmup works correctly."""
+    pytester.makepyfile(
+        """
+        import pytest
+
+        def test_stateful_warmup(benchmark):
+            has_run = False
+
+            def b():
+                nonlocal has_run
+                assert not has_run, "Benchmark ran multiple times without setup"
+                has_run = True
+
+            benchmark(b)
+        """
+    )
+    result = run_pytest_codspeed_with_mode(pytester, mode)
+    assert result.ret == 0, "the run should have succeeded"
+    result.assert_outcomes(passed=1)
+
+
+@pytest.mark.xfail(reason="not supported by pytest-benchmark, see #78")
+@pytest.mark.parametrize("mode", [*MeasurementMode])
+def test_stateful_warmup_marker(
+    pytester: pytest.Pytester, mode: MeasurementMode
+) -> None:
+    """Test that the stateful warmup marker works correctly."""
+    pytester.makepyfile(
+        """
+        import pytest
+
+        has_run = False
+
+        @pytest.fixture(autouse=True)
+        def fixture():
+            global has_run
+            has_run = False
+
+
+        @pytest.mark.benchmark
+        def test_stateful_warmup_marker():
+            global has_run
+            assert not has_run, "Benchmark ran multiple times without setup"
+            has_run = True
+        """
+    )
+    result = run_pytest_codspeed_with_mode(pytester, mode)
+    assert result.ret == 0, "the run should have succeeded"
+    result.assert_outcomes(passed=1)
+
+
 @pytest.mark.parametrize("mode", [*MeasurementMode])
 def test_benchmark_fixture_used_twice(
     pytester: pytest.Pytester, mode: MeasurementMode
@@ -360,3 +417,24 @@ def foo():
         ["*RuntimeError: The benchmark fixture can only be used once per test*"]
     )
 
+
+@pytest.mark.parametrize("mode", [*MeasurementMode])
+def test_benchmark_fixture_used_normal_pedantic(
+    pytester: pytest.Pytester, mode: MeasurementMode
+) -> None:
+    """Test that using the benchmark fixture twice in a test raises an error."""
+    pytester.makepyfile(
+        """
+        def test_benchmark_used_twice(benchmark):
+            def foo():
+                pass
+
+            benchmark(foo)
+            benchmark.pedantic(foo)
+        """
+    )
+    result = run_pytest_codspeed_with_mode(pytester, mode)
+    assert result.ret == 1, "the run should have failed"
+    result.stdout.fnmatch_lines(
+        ["*RuntimeError: The benchmark fixture can only be used once per test*"]
+    )
diff --git a/tests/test_pytest_plugin_cpu_instrumentation.py b/tests/test_pytest_plugin_cpu_instrumentation.py
index d72ca4a..4856ae6 100644
--- a/tests/test_pytest_plugin_cpu_instrumentation.py
+++ b/tests/test_pytest_plugin_cpu_instrumentation.py
@@ -116,3 +116,84 @@ def test_my_stuff(benchmark, i):
             result = pytester.runpytest("--codspeed", "-n", "128")
         assert result.ret == 0, "the run should have succeeded"
         result.stdout.fnmatch_lines(["*256 passed*"])
+
+
+def test_valgrind_pedantic_warning(pytester: pytest.Pytester) -> None:
+    """
+    Test that using pedantic mode with Valgrind instrumentation shows a warning about
+    ignoring rounds and iterations.
+    """
+    pytester.makepyfile(
+        """
+        def test_benchmark_pedantic(benchmark):
+            def foo():
+                return 1 + 1
+
+            benchmark.pedantic(foo, rounds=10, iterations=100)
+        """
+    )
+    result = run_pytest_codspeed_with_mode(pytester, MeasurementMode.Instrumentation)
+    result.stdout.fnmatch_lines(
+        [
+            "*UserWarning: Valgrind instrument ignores rounds and iterations settings "
+            "in pedantic mode*"
+        ]
+    )
+    result.assert_outcomes(passed=1)
+
+
+@skip_without_valgrind
+@skip_without_perf_trampoline
+def test_benchmark_pedantic_instrumentation(
+    pytester: pytest.Pytester, codspeed_env
+) -> None:
+    """Test that pedantic mode works with instrumentation mode."""
+    pytester.makepyfile(
+        """
+        def test_pedantic_full_features(benchmark):
+            setup_calls = 0
+            teardown_calls = 0
+            target_calls = 0
+
+            def setup():
+                nonlocal setup_calls
+                setup_calls += 1
+                return (1, 2), {"c": 3}
+
+            def teardown(a, b, c):
+                nonlocal teardown_calls
+                teardown_calls += 1
+                assert a == 1
+                assert b == 2
+                assert c == 3
+
+            def target(a, b, c):
+                nonlocal target_calls
+                target_calls += 1
+                assert a == 1
+                assert b == 2
+                assert c == 3
+                return a + b + c
+
+            result = benchmark.pedantic(
+                target,
+                setup=setup,
+                teardown=teardown,
+                rounds=3,
+                warmup_rounds=3
+            )
+
+            # Verify the results
+            # Instrumentation ignores rounds but is called during warmup
+            assert result == 6  # 1 + 2 + 3
+            assert setup_calls == 1 + 3
+            assert teardown_calls == 1 + 3
+            assert target_calls == 1 + 3
+        """
+    )
+    with codspeed_env():
+        result = run_pytest_codspeed_with_mode(
+            pytester, MeasurementMode.Instrumentation
+        )
+    assert result.ret == 0, "the run should have succeeded"
+    result.assert_outcomes(passed=1)
diff --git a/tests/test_pytest_plugin_walltime.py b/tests/test_pytest_plugin_walltime.py
index 7e86ca4..510ab30 100644
--- a/tests/test_pytest_plugin_walltime.py
+++ b/tests/test_pytest_plugin_walltime.py
@@ -35,3 +35,54 @@ def test_my_stuff(benchmark, inp):
             "*3 benchmarked*",
         ]
     )
+
+
+def test_benchmark_pedantic_walltime(
+    pytester: pytest.Pytester,
+) -> None:
+    """Test that pedantic mode works with walltime mode."""
+    pytester.makepyfile(
+        """
+        def test_pedantic_full_features(benchmark):
+            setup_calls = 0
+            teardown_calls = 0
+            target_calls = 0
+
+            def setup():
+                nonlocal setup_calls
+                setup_calls += 1
+                return (1, 2), {"c": 3}
+
+            def teardown(a, b, c):
+                nonlocal teardown_calls
+                teardown_calls += 1
+                assert a == 1
+                assert b == 2
+                assert c == 3
+
+            def target(a, b, c):
+                nonlocal target_calls
+                target_calls += 1
+                assert a == 1
+                assert b == 2
+                assert c == 3
+                return a + b + c
+
+            result = benchmark.pedantic(
+                target,
+                setup=setup,
+                teardown=teardown,
+                rounds=3,
+                warmup_rounds=1
+            )
+
+            # Verify the results
+            assert result == 6  # 1 + 2 + 3
+            assert setup_calls == 5  # 3 rounds + 1 warmup + 1 calibration
+            assert teardown_calls == 5
+            assert target_calls == 5
+        """
+    )
+    result = run_pytest_codspeed_with_mode(pytester, MeasurementMode.WallTime)
+    assert result.ret == 0, "the run should have succeeded"
+    result.assert_outcomes(passed=1)