From c4adb9b483bc3b5d5f9cd6c0d440d3e5b819002b Mon Sep 17 00:00:00 2001 From: Arthur Pastel Date: Tue, 3 Jun 2025 17:18:38 +0200 Subject: [PATCH 1/2] feat: make sure the benchmark fixture can only be called once per bench --- src/pytest_codspeed/plugin.py | 20 +++++++++++++------- tests/test_pytest_plugin.py | 22 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/pytest_codspeed/plugin.py b/src/pytest_codspeed/plugin.py index f3dfdc2..3a20675 100644 --- a/src/pytest_codspeed/plugin.py +++ b/src/pytest_codspeed/plugin.py @@ -318,14 +318,20 @@ def __init__(self, request: pytest.FixtureRequest): self.extra_info: dict = {} self._request = request - - def __call__(self, func: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> T: - config = self._request.config - plugin = get_plugin(config) - if plugin.is_codspeed_enabled: - return _measure(plugin, self._request.node, config, func, *args, **kwargs) + self._config = self._request.config + self._plugin = get_plugin(self._config) + self._called = False + + def __call__(self, target: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> T: + if self._called: + raise RuntimeError("The benchmark fixture can only be used once per test") + self._called = True + if self._plugin.is_codspeed_enabled: + return _measure( + self._plugin, self._request.node, self._config, target, *args, **kwargs + ) else: - return func(*args, **kwargs) + return target(*args, **kwargs) @pytest.fixture(scope="function") diff --git a/tests/test_pytest_plugin.py b/tests/test_pytest_plugin.py index fa19362..8e7b776 100644 --- a/tests/test_pytest_plugin.py +++ b/tests/test_pytest_plugin.py @@ -338,3 +338,25 @@ def test_capsys(capsys): result.assert_outcomes(passed=1) result.stdout.no_fnmatch_line("*print to stdout*") result.stderr.no_fnmatch_line("*print to stderr*") + +@pytest.mark.parametrize("mode", [*MeasurementMode]) +def test_benchmark_fixture_used_twice( + pytester: pytest.Pytester, mode: MeasurementMode +) -> None: + """Test that using the benchmark fixture twice in a test raises an error.""" + pytester.makepyfile( + """ + def test_benchmark_used_twice(benchmark): + def foo(): + pass + + benchmark(foo) + benchmark(foo) + """ + ) + result = run_pytest_codspeed_with_mode(pytester, mode) + assert result.ret == 1, "the run should have failed" + result.stdout.fnmatch_lines( + ["*RuntimeError: The benchmark fixture can only be used once per test*"] + ) + From 96fe457ff3bba6c22194a5b7f94a683ef31bd727 Mon Sep 17 00:00:00 2001 From: Arthur Pastel Date: Thu, 5 Jun 2025 22:22:12 +0200 Subject: [PATCH 2/2] feat: support pytest-benchmark's pedantic API --- src/pytest_codspeed/config.py | 63 ++++- src/pytest_codspeed/instruments/__init__.py | 20 +- src/pytest_codspeed/instruments/valgrind.py | 56 ++++- src/pytest_codspeed/instruments/walltime.py | 216 ++++++++++-------- src/pytest_codspeed/plugin.py | 92 ++++++-- tests/test_pytest_plugin.py | 78 +++++++ .../test_pytest_plugin_cpu_instrumentation.py | 81 +++++++ tests/test_pytest_plugin_walltime.py | 51 +++++ 8 files changed, 530 insertions(+), 127 deletions(-) diff --git a/src/pytest_codspeed/config.py b/src/pytest_codspeed/config.py index 0d2d881..1932a8e 100644 --- a/src/pytest_codspeed/config.py +++ b/src/pytest_codspeed/config.py @@ -1,9 +1,14 @@ from __future__ import annotations -from dataclasses import dataclass -from typing import TYPE_CHECKING +import dataclasses +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Generic, TypeVar + +T = TypeVar("T") if TYPE_CHECKING: + from typing import Any, Callable + import pytest @@ -64,17 +69,51 @@ def from_pytest_item(cls, item: pytest.Item) -> BenchmarkMarkerOptions: raise ValueError( "Positional arguments are not allowed in the benchmark marker" ) + kwargs = marker.kwargs - options = cls( - group=marker.kwargs.pop("group", None), - min_time=marker.kwargs.pop("min_time", None), - max_time=marker.kwargs.pop("max_time", None), - max_rounds=marker.kwargs.pop("max_rounds", None), - ) - - if len(marker.kwargs) > 0: + unknown_kwargs = set(kwargs.keys()) - { + field.name for field in dataclasses.fields(cls) + } + if unknown_kwargs: raise ValueError( "Unknown kwargs passed to benchmark marker: " - + ", ".join(marker.kwargs.keys()) + + ", ".join(sorted(unknown_kwargs)) ) - return options + + return cls(**kwargs) + + +@dataclass(frozen=True) +class PedanticOptions(Generic[T]): + """Parameters for running a benchmark using the pedantic fixture API.""" + + target: Callable[..., T] + setup: Callable[[], Any | None] | None + teardown: Callable[..., Any | None] | None + rounds: int + warmup_rounds: int + iterations: int + args: tuple[Any, ...] = field(default_factory=tuple) + kwargs: dict[str, Any] = field(default_factory=dict) + + def __post_init__(self) -> None: + if self.rounds < 0: + raise ValueError("rounds must be positive") + if self.warmup_rounds < 0: + raise ValueError("warmup_rounds must be non-negative") + if self.iterations <= 0: + raise ValueError("iterations must be positive") + if self.iterations > 1 and self.setup is not None: + raise ValueError( + "setup cannot be used with multiple iterations, use multiple rounds" + ) + + def setup_and_get_args_kwargs(self) -> tuple[tuple[Any, ...], dict[str, Any]]: + if self.setup is None: + return self.args, self.kwargs + maybe_result = self.setup(*self.args, **self.kwargs) + if maybe_result is not None: + if len(self.args) > 0 or len(self.kwargs) > 0: + raise ValueError("setup cannot return a value when args are provided") + return maybe_result + return self.args, self.kwargs diff --git a/src/pytest_codspeed/instruments/__init__.py b/src/pytest_codspeed/instruments/__init__.py index d163783..fb264f1 100644 --- a/src/pytest_codspeed/instruments/__init__.py +++ b/src/pytest_codspeed/instruments/__init__.py @@ -5,15 +5,14 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Any, Callable, ClassVar, ParamSpec, TypeVar + from typing import Any, Callable, ClassVar, TypeVar import pytest - from pytest_codspeed.config import BenchmarkMarkerOptions + from pytest_codspeed.config import BenchmarkMarkerOptions, PedanticOptions from pytest_codspeed.plugin import CodSpeedConfig T = TypeVar("T") - P = ParamSpec("P") class Instrument(metaclass=ABCMeta): @@ -31,9 +30,18 @@ def measure( marker_options: BenchmarkMarkerOptions, name: str, uri: str, - fn: Callable[P, T], - *args: P.args, - **kwargs: P.kwargs, + fn: Callable[..., T], + *args: tuple, + **kwargs: dict[str, Any], + ) -> T: ... + + @abstractmethod + def measure_pedantic( + self, + marker_options: BenchmarkMarkerOptions, + pedantic_options: PedanticOptions[T], + name: str, + uri: str, ) -> T: ... @abstractmethod diff --git a/src/pytest_codspeed/instruments/valgrind.py b/src/pytest_codspeed/instruments/valgrind.py index 4476a19..9f135f4 100644 --- a/src/pytest_codspeed/instruments/valgrind.py +++ b/src/pytest_codspeed/instruments/valgrind.py @@ -1,6 +1,7 @@ from __future__ import annotations import sys +import warnings from typing import TYPE_CHECKING from pytest_codspeed import __semver_version__ @@ -12,7 +13,8 @@ from pytest import Session - from pytest_codspeed.instruments import P, T + from pytest_codspeed.config import PedanticOptions + from pytest_codspeed.instruments import T from pytest_codspeed.plugin import BenchmarkMarkerOptions, CodSpeedConfig SUPPORTS_PERF_TRAMPOLINE = sys.version_info >= (3, 12) @@ -52,9 +54,9 @@ def measure( marker_options: BenchmarkMarkerOptions, name: str, uri: str, - fn: Callable[P, T], - *args: P.args, - **kwargs: P.kwargs, + fn: Callable[..., T], + *args: tuple, + **kwargs: dict[str, Any], ) -> T: self.benchmark_count += 1 @@ -78,8 +80,54 @@ def __codspeed_root_frame__() -> T: self.instrument_hooks.lib.callgrind_stop_instrumentation() self.instrument_hooks.set_executed_benchmark(uri) + def measure_pedantic( + self, + marker_options: BenchmarkMarkerOptions, + pedantic_options: PedanticOptions[T], + name: str, + uri: str, + ) -> T: + if pedantic_options.rounds != 1 or pedantic_options.iterations != 1: + warnings.warn( + "Valgrind instrument ignores rounds and iterations settings " + "in pedantic mode" + ) + if not self.instrument_hooks: + args, kwargs = pedantic_options.setup_and_get_args_kwargs() + out = pedantic_options.target(*args, **kwargs) + if pedantic_options.teardown is not None: + pedantic_options.teardown(*args, **kwargs) + return out + + def __codspeed_root_frame__(*args, **kwargs) -> T: + return pedantic_options.target(*args, **kwargs) + + # Warmup + warmup_rounds = max( + pedantic_options.warmup_rounds, 1 if SUPPORTS_PERF_TRAMPOLINE else 0 + ) + for _ in range(warmup_rounds): + args, kwargs = pedantic_options.setup_and_get_args_kwargs() + __codspeed_root_frame__(*args, **kwargs) + if pedantic_options.teardown is not None: + pedantic_options.teardown(*args, **kwargs) + + # Compute the actual result of the function + args, kwargs = pedantic_options.setup_and_get_args_kwargs() + self.instrument_hooks.lib.callgrind_start_instrumentation() + try: + out = __codspeed_root_frame__(*args, **kwargs) + finally: + self.instrument_hooks.lib.callgrind_stop_instrumentation() + self.instrument_hooks.set_executed_benchmark(uri) + if pedantic_options.teardown is not None: + pedantic_options.teardown(*args, **kwargs) + + return out + def report(self, session: Session) -> None: reporter = session.config.pluginmanager.get_plugin("terminalreporter") + assert reporter is not None, "terminalreporter not found" count_suffix = "benchmarked" if self.should_measure else "benchmark tested" reporter.write_sep( "=", diff --git a/src/pytest_codspeed/instruments/walltime.py b/src/pytest_codspeed/instruments/walltime.py index 28f2412..a992477 100644 --- a/src/pytest_codspeed/instruments/walltime.py +++ b/src/pytest_codspeed/instruments/walltime.py @@ -22,7 +22,8 @@ from pytest import Session - from pytest_codspeed.instruments import P, T + from pytest_codspeed.config import PedanticOptions + from pytest_codspeed.instruments import T from pytest_codspeed.plugin import BenchmarkMarkerOptions, CodSpeedConfig DEFAULT_WARMUP_TIME_NS = 1_000_000_000 @@ -153,80 +154,6 @@ class Benchmark: stats: BenchmarkStats -def run_benchmark( - instrument_hooks: InstrumentHooks | None, - name: str, - uri: str, - fn: Callable[P, T], - args, - kwargs, - config: BenchmarkConfig, -) -> tuple[Benchmark, T]: - def __codspeed_root_frame__() -> T: - return fn(*args, **kwargs) - - # Compute the actual result of the function - out = __codspeed_root_frame__() - - # Warmup - times_per_round_ns: list[float] = [] - warmup_start = start = perf_counter_ns() - while True: - start = perf_counter_ns() - __codspeed_root_frame__() - end = perf_counter_ns() - times_per_round_ns.append(end - start) - if end - warmup_start > config.warmup_time_ns: - break - - # Round sizing - warmup_mean_ns = mean(times_per_round_ns) - warmup_iters = len(times_per_round_ns) - times_per_round_ns.clear() - iter_per_round = ( - int(ceil(config.min_round_time_ns / warmup_mean_ns)) - if warmup_mean_ns <= config.min_round_time_ns - else 1 - ) - if config.max_rounds is None: - round_time_ns = warmup_mean_ns * iter_per_round - rounds = int(config.max_time_ns / round_time_ns) - else: - rounds = config.max_rounds - rounds = max(1, rounds) - - # Benchmark - iter_range = range(iter_per_round) - run_start = perf_counter_ns() - if instrument_hooks: - instrument_hooks.start_benchmark() - for _ in range(rounds): - start = perf_counter_ns() - for _ in iter_range: - __codspeed_root_frame__() - end = perf_counter_ns() - times_per_round_ns.append(end - start) - - if end - run_start > config.max_time_ns: - # TODO: log something - break - if instrument_hooks: - instrument_hooks.stop_benchmark() - instrument_hooks.set_executed_benchmark(uri) - benchmark_end = perf_counter_ns() - total_time = (benchmark_end - run_start) / 1e9 - - stats = BenchmarkStats.from_list( - times_per_round_ns, - rounds=rounds, - total_time=total_time, - iter_per_round=iter_per_round, - warmup_iters=warmup_iters, - ) - - return Benchmark(name=name, uri=uri, config=config, stats=stats), out - - class WallTimeInstrument(Instrument): instrument = "walltime" instrument_hooks: InstrumentHooks | None @@ -253,26 +180,137 @@ def measure( marker_options: BenchmarkMarkerOptions, name: str, uri: str, - fn: Callable[P, T], - *args: P.args, - **kwargs: P.kwargs, + fn: Callable[..., T], + *args: tuple, + **kwargs: dict[str, Any], ) -> T: - bench, out = run_benchmark( - instrument_hooks=self.instrument_hooks, - name=name, - uri=uri, - fn=fn, - args=args, - kwargs=kwargs, - config=BenchmarkConfig.from_codspeed_config_and_marker_data( - self.config, marker_options - ), + benchmark_config = BenchmarkConfig.from_codspeed_config_and_marker_data( + self.config, marker_options + ) + + def __codspeed_root_frame__() -> T: + return fn(*args, **kwargs) + + # Compute the actual result of the function + out = __codspeed_root_frame__() + + # Warmup + times_per_round_ns: list[float] = [] + warmup_start = start = perf_counter_ns() + while True: + start = perf_counter_ns() + __codspeed_root_frame__() + end = perf_counter_ns() + times_per_round_ns.append(end - start) + if end - warmup_start > benchmark_config.warmup_time_ns: + break + + # Round sizing + warmup_mean_ns = mean(times_per_round_ns) + warmup_iters = len(times_per_round_ns) + times_per_round_ns.clear() + iter_per_round = ( + int(ceil(benchmark_config.min_round_time_ns / warmup_mean_ns)) + if warmup_mean_ns <= benchmark_config.min_round_time_ns + else 1 + ) + if benchmark_config.max_rounds is None: + round_time_ns = warmup_mean_ns * iter_per_round + rounds = int(benchmark_config.max_time_ns / round_time_ns) + else: + rounds = benchmark_config.max_rounds + rounds = max(1, rounds) + + # Benchmark + iter_range = range(iter_per_round) + run_start = perf_counter_ns() + for _ in range(rounds): + start = perf_counter_ns() + for _ in iter_range: + __codspeed_root_frame__() + end = perf_counter_ns() + times_per_round_ns.append(end - start) + + if end - run_start > benchmark_config.max_time_ns: + # TODO: log something + break + benchmark_end = perf_counter_ns() + total_time = (benchmark_end - run_start) / 1e9 + + stats = BenchmarkStats.from_list( + times_per_round_ns, + rounds=rounds, + total_time=total_time, + iter_per_round=iter_per_round, + warmup_iters=warmup_iters, + ) + + self.benchmarks.append( + Benchmark(name=name, uri=uri, config=benchmark_config, stats=stats) + ) + return out + + def measure_pedantic( + self, + marker_options: BenchmarkMarkerOptions, + pedantic_options: PedanticOptions[T], + name: str, + uri: str, + ) -> T: + benchmark_config = BenchmarkConfig.from_codspeed_config_and_marker_data( + self.config, marker_options + ) + + def __codspeed_root_frame__(*args, **kwargs) -> T: + return pedantic_options.target(*args, **kwargs) + + iter_range = range(pedantic_options.iterations) + + # Warmup + for _ in range(pedantic_options.warmup_rounds): + args, kwargs = pedantic_options.setup_and_get_args_kwargs() + for _ in iter_range: + __codspeed_root_frame__(*args, **kwargs) + if pedantic_options.teardown is not None: + pedantic_options.teardown(*args, **kwargs) + + # Benchmark + times_per_round_ns: list[float] = [] + benchmark_start = perf_counter_ns() + for _ in range(pedantic_options.rounds): + start = perf_counter_ns() + args, kwargs = pedantic_options.setup_and_get_args_kwargs() + for _ in iter_range: + __codspeed_root_frame__(*args, **kwargs) + end = perf_counter_ns() + times_per_round_ns.append(end - start) + if pedantic_options.teardown is not None: + pedantic_options.teardown(*args, **kwargs) + + benchmark_end = perf_counter_ns() + total_time = (benchmark_end - benchmark_start) / 1e9 + stats = BenchmarkStats.from_list( + times_per_round_ns, + rounds=pedantic_options.rounds, + total_time=total_time, + iter_per_round=pedantic_options.iterations, + warmup_iters=pedantic_options.warmup_rounds, + ) + + # Compute the actual result of the function + args, kwargs = pedantic_options.setup_and_get_args_kwargs() + out = __codspeed_root_frame__(*args, **kwargs) + if pedantic_options.teardown is not None: + pedantic_options.teardown(*args, **kwargs) + + self.benchmarks.append( + Benchmark(name=name, uri=uri, config=benchmark_config, stats=stats) ) - self.benchmarks.append(bench) return out def report(self, session: Session) -> None: reporter = session.config.pluginmanager.get_plugin("terminalreporter") + assert reporter is not None, "terminalreporter not found" if len(self.benchmarks) == 0: reporter.write_sep( diff --git a/src/pytest_codspeed/plugin.py b/src/pytest_codspeed/plugin.py index 3a20675..16f0f06 100644 --- a/src/pytest_codspeed/plugin.py +++ b/src/pytest_codspeed/plugin.py @@ -14,7 +14,11 @@ import pytest from _pytest.fixtures import FixtureManager -from pytest_codspeed.config import BenchmarkMarkerOptions, CodSpeedConfig +from pytest_codspeed.config import ( + BenchmarkMarkerOptions, + CodSpeedConfig, + PedanticOptions, +) from pytest_codspeed.instruments import ( MeasurementMode, get_instrument_from_mode, @@ -27,12 +31,11 @@ from . import __version__ if TYPE_CHECKING: - from typing import Callable, ParamSpec, TypeVar + from typing import Any, Callable, TypeVar from pytest_codspeed.instruments import Instrument T = TypeVar("T") - P = ParamSpec("P") IS_PYTEST_BENCHMARK_INSTALLED = importlib.util.find_spec("pytest_benchmark") is not None IS_PYTEST_SPEED_INSTALLED = importlib.util.find_spec("pytest_speed") is not None @@ -137,14 +140,14 @@ def pytest_configure(config: pytest.Config): profile_folder = os.environ.get("CODSPEED_PROFILE_FOLDER") - codspeedconfig = CodSpeedConfig.from_pytest_config(config) + codspeed_config = CodSpeedConfig.from_pytest_config(config) plugin = CodSpeedPlugin( disabled_plugins=tuple(disabled_plugins), is_codspeed_enabled=is_codspeed_enabled, mode=mode, - instrument=instrument(codspeedconfig), - config=codspeedconfig, + instrument=instrument(codspeed_config), + config=codspeed_config, profile_folder=Path(profile_folder) if profile_folder else None, ) config.pluginmanager.register(plugin, PLUGIN_NAME) @@ -235,9 +238,10 @@ def _measure( plugin: CodSpeedPlugin, node: pytest.Item, config: pytest.Config, - fn: Callable[P, T], - *args: P.args, - **kwargs: P.kwargs, + pedantic_options: PedanticOptions | None, + fn: Callable[..., T], + args: tuple[Any, ...], + kwargs: dict[str, Any], ) -> T: marker_options = BenchmarkMarkerOptions.from_pytest_item(node) random.seed(0) @@ -247,7 +251,14 @@ def _measure( gc.disable() try: uri, name = get_git_relative_uri_and_name(node.nodeid, config.rootpath) - return plugin.instrument.measure(marker_options, name, uri, fn, *args, **kwargs) + if pedantic_options is None: + return plugin.instrument.measure( + marker_options, name, uri, fn, *args, **kwargs + ) + else: + return plugin.instrument.measure_pedantic( + marker_options, pedantic_options, name, uri + ) finally: # Ensure GC is re-enabled even if the test failed if is_gc_enabled: @@ -258,11 +269,11 @@ def wrap_runtest( plugin: CodSpeedPlugin, node: pytest.Item, config: pytest.Config, - fn: Callable[P, T], -) -> Callable[P, T]: + fn: Callable[..., T], +) -> Callable[..., T]: @functools.wraps(fn) - def wrapped(*args: P.args, **kwargs: P.kwargs) -> T: - return _measure(plugin, node, config, fn, *args, **kwargs) + def wrapped(*args: tuple, **kwargs: dict[str, Any]) -> T: + return _measure(plugin, node, config, None, fn, args, kwargs) return wrapped @@ -322,17 +333,66 @@ def __init__(self, request: pytest.FixtureRequest): self._plugin = get_plugin(self._config) self._called = False - def __call__(self, target: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> T: + def __call__( + self, target: Callable[..., T], *args: tuple, **kwargs: dict[str, Any] + ) -> T: if self._called: raise RuntimeError("The benchmark fixture can only be used once per test") self._called = True if self._plugin.is_codspeed_enabled: return _measure( - self._plugin, self._request.node, self._config, target, *args, **kwargs + self._plugin, + self._request.node, + self._config, + None, + target, + args, + kwargs, ) else: return target(*args, **kwargs) + def pedantic( + self, + target: Callable[..., T], + args: tuple[Any, ...] = (), + kwargs: dict[str, Any] = {}, + setup: Callable | None = None, + teardown: Callable | None = None, + rounds: int = 1, + warmup_rounds: int = 0, + iterations: int = 1, + ): + if self._called: + raise RuntimeError("The benchmark fixture can only be used once per test") + self._called = True + pedantic_options = PedanticOptions( + target=target, + args=args, + kwargs=kwargs, + setup=setup, + teardown=teardown, + rounds=rounds, + warmup_rounds=warmup_rounds, + iterations=iterations, + ) + if self._plugin.is_codspeed_enabled: + return _measure( + self._plugin, + self._request.node, + self._config, + pedantic_options, + target, + args, + kwargs, + ) + else: + args, kwargs = pedantic_options.setup_and_get_args_kwargs() + result = target(*args, **kwargs) + if pedantic_options.teardown is not None: + pedantic_options.teardown(*args, **kwargs) + return result + @pytest.fixture(scope="function") def codspeed_benchmark(request: pytest.FixtureRequest) -> Callable: diff --git a/tests/test_pytest_plugin.py b/tests/test_pytest_plugin.py index 8e7b776..89cd7b1 100644 --- a/tests/test_pytest_plugin.py +++ b/tests/test_pytest_plugin.py @@ -339,6 +339,63 @@ def test_capsys(capsys): result.stdout.no_fnmatch_line("*print to stdout*") result.stderr.no_fnmatch_line("*print to stderr*") + +@pytest.mark.xfail(reason="not supported by pytest-benchmark, see #78") +@pytest.mark.parametrize("mode", [*MeasurementMode]) +def test_stateful_warmup_fixture( + pytester: pytest.Pytester, mode: MeasurementMode +) -> None: + """Test that the stateful warmup works correctly.""" + pytester.makepyfile( + """ + import pytest + + def test_stateful_warmup(benchmark): + has_run = False + + def b(): + nonlocal has_run + assert not has_run, "Benchmark ran multiple times without setup" + has_run = True + + benchmark(b) + """ + ) + result = run_pytest_codspeed_with_mode(pytester, mode) + assert result.ret == 0, "the run should have succeeded" + result.assert_outcomes(passed=1) + + +@pytest.mark.xfail(reason="not supported by pytest-benchmark, see #78") +@pytest.mark.parametrize("mode", [*MeasurementMode]) +def test_stateful_warmup_marker( + pytester: pytest.Pytester, mode: MeasurementMode +) -> None: + """Test that the stateful warmup marker works correctly.""" + pytester.makepyfile( + """ + import pytest + + has_run = False + + @pytest.fixture(autouse=True) + def fixture(): + global has_run + has_run = False + + + @pytest.mark.benchmark + def test_stateful_warmup_marker(): + global has_run + assert not has_run, "Benchmark ran multiple times without setup" + has_run = True + """ + ) + result = run_pytest_codspeed_with_mode(pytester, mode) + assert result.ret == 0, "the run should have succeeded" + result.assert_outcomes(passed=1) + + @pytest.mark.parametrize("mode", [*MeasurementMode]) def test_benchmark_fixture_used_twice( pytester: pytest.Pytester, mode: MeasurementMode @@ -360,3 +417,24 @@ def foo(): ["*RuntimeError: The benchmark fixture can only be used once per test*"] ) + +@pytest.mark.parametrize("mode", [*MeasurementMode]) +def test_benchmark_fixture_used_normal_pedantic( + pytester: pytest.Pytester, mode: MeasurementMode +) -> None: + """Test that using the benchmark fixture twice in a test raises an error.""" + pytester.makepyfile( + """ + def test_benchmark_used_twice(benchmark): + def foo(): + pass + + benchmark(foo) + benchmark.pedantic(foo) + """ + ) + result = run_pytest_codspeed_with_mode(pytester, mode) + assert result.ret == 1, "the run should have failed" + result.stdout.fnmatch_lines( + ["*RuntimeError: The benchmark fixture can only be used once per test*"] + ) diff --git a/tests/test_pytest_plugin_cpu_instrumentation.py b/tests/test_pytest_plugin_cpu_instrumentation.py index d72ca4a..4856ae6 100644 --- a/tests/test_pytest_plugin_cpu_instrumentation.py +++ b/tests/test_pytest_plugin_cpu_instrumentation.py @@ -116,3 +116,84 @@ def test_my_stuff(benchmark, i): result = pytester.runpytest("--codspeed", "-n", "128") assert result.ret == 0, "the run should have succeeded" result.stdout.fnmatch_lines(["*256 passed*"]) + + +def test_valgrind_pedantic_warning(pytester: pytest.Pytester) -> None: + """ + Test that using pedantic mode with Valgrind instrumentation shows a warning about + ignoring rounds and iterations. + """ + pytester.makepyfile( + """ + def test_benchmark_pedantic(benchmark): + def foo(): + return 1 + 1 + + benchmark.pedantic(foo, rounds=10, iterations=100) + """ + ) + result = run_pytest_codspeed_with_mode(pytester, MeasurementMode.Instrumentation) + result.stdout.fnmatch_lines( + [ + "*UserWarning: Valgrind instrument ignores rounds and iterations settings " + "in pedantic mode*" + ] + ) + result.assert_outcomes(passed=1) + + +@skip_without_valgrind +@skip_without_perf_trampoline +def test_benchmark_pedantic_instrumentation( + pytester: pytest.Pytester, codspeed_env +) -> None: + """Test that pedantic mode works with instrumentation mode.""" + pytester.makepyfile( + """ + def test_pedantic_full_features(benchmark): + setup_calls = 0 + teardown_calls = 0 + target_calls = 0 + + def setup(): + nonlocal setup_calls + setup_calls += 1 + return (1, 2), {"c": 3} + + def teardown(a, b, c): + nonlocal teardown_calls + teardown_calls += 1 + assert a == 1 + assert b == 2 + assert c == 3 + + def target(a, b, c): + nonlocal target_calls + target_calls += 1 + assert a == 1 + assert b == 2 + assert c == 3 + return a + b + c + + result = benchmark.pedantic( + target, + setup=setup, + teardown=teardown, + rounds=3, + warmup_rounds=3 + ) + + # Verify the results + # Instrumentation ignores rounds but is called during warmup + assert result == 6 # 1 + 2 + 3 + assert setup_calls == 1 + 3 + assert teardown_calls == 1 + 3 + assert target_calls == 1 + 3 + """ + ) + with codspeed_env(): + result = run_pytest_codspeed_with_mode( + pytester, MeasurementMode.Instrumentation + ) + assert result.ret == 0, "the run should have succeeded" + result.assert_outcomes(passed=1) diff --git a/tests/test_pytest_plugin_walltime.py b/tests/test_pytest_plugin_walltime.py index 7e86ca4..510ab30 100644 --- a/tests/test_pytest_plugin_walltime.py +++ b/tests/test_pytest_plugin_walltime.py @@ -35,3 +35,54 @@ def test_my_stuff(benchmark, inp): "*3 benchmarked*", ] ) + + +def test_benchmark_pedantic_walltime( + pytester: pytest.Pytester, +) -> None: + """Test that pedantic mode works with walltime mode.""" + pytester.makepyfile( + """ + def test_pedantic_full_features(benchmark): + setup_calls = 0 + teardown_calls = 0 + target_calls = 0 + + def setup(): + nonlocal setup_calls + setup_calls += 1 + return (1, 2), {"c": 3} + + def teardown(a, b, c): + nonlocal teardown_calls + teardown_calls += 1 + assert a == 1 + assert b == 2 + assert c == 3 + + def target(a, b, c): + nonlocal target_calls + target_calls += 1 + assert a == 1 + assert b == 2 + assert c == 3 + return a + b + c + + result = benchmark.pedantic( + target, + setup=setup, + teardown=teardown, + rounds=3, + warmup_rounds=1 + ) + + # Verify the results + assert result == 6 # 1 + 2 + 3 + assert setup_calls == 5 # 3 rounds + 1 warmup + 1 calibration + assert teardown_calls == 5 + assert target_calls == 5 + """ + ) + result = run_pytest_codspeed_with_mode(pytester, MeasurementMode.WallTime) + assert result.ret == 0, "the run should have succeeded" + result.assert_outcomes(passed=1)