#!/usr/bin/env python3 """ Benchmark runner for mainline - tests performance across effects and displays. Usage: python -m engine.benchmark python -m engine.benchmark --output report.md python -m engine.benchmark --displays terminal,websocket --effects glitch,fade """ import argparse import json import sys import time from dataclasses import dataclass, field from typing import Any import numpy as np @dataclass class BenchmarkResult: """Result of a single benchmark run.""" name: str display: str effect: str | None iterations: int total_time_ms: float avg_time_ms: float std_dev_ms: float min_ms: float max_ms: float fps: float chars_processed: int chars_per_sec: float @dataclass class BenchmarkReport: """Complete benchmark report.""" timestamp: str python_version: str results: list[BenchmarkResult] = field(default_factory=list) summary: dict[str, Any] = field(default_factory=dict) def get_sample_buffer(width: int = 80, height: int = 24) -> list[str]: """Generate a sample buffer for benchmarking.""" lines = [] for i in range(height): line = f"\x1b[32mLine {i}\x1b[0m " + "A" * (width - 10) lines.append(line) return lines def benchmark_display( display_class, buffer: list[str], iterations: int = 100 ) -> BenchmarkResult: """Benchmark a single display.""" display = display_class() display.init(80, 24) times = [] chars = sum(len(line) for line in buffer) for _ in range(iterations): t0 = time.perf_counter() display.show(buffer) elapsed = (time.perf_counter() - t0) * 1000 times.append(elapsed) display.cleanup() times_arr = np.array(times) return BenchmarkResult( name=f"display_{display_class.__name__}", display=display_class.__name__, effect=None, iterations=iterations, total_time_ms=sum(times), avg_time_ms=np.mean(times_arr), std_dev_ms=np.std(times_arr), min_ms=np.min(times_arr), max_ms=np.max(times_arr), fps=1000.0 / np.mean(times_arr) if np.mean(times_arr) > 0 else 0, chars_processed=chars * iterations, chars_per_sec=(chars * iterations) / (sum(times) / 1000) if sum(times) > 0 else 0, ) def benchmark_effect_with_display( effect_class, display, buffer: list[str], iterations: int = 100 ) -> BenchmarkResult: """Benchmark an effect with a display.""" effect = effect_class() effect.configure(enabled=True, intensity=1.0) times = [] chars = sum(len(line) for line in buffer) for _ in range(iterations): processed = effect.process(buffer) t0 = time.perf_counter() display.show(processed) elapsed = (time.perf_counter() - t0) * 1000 times.append(elapsed) display.cleanup() times_arr = np.array(times) return BenchmarkResult( name=f"effect_{effect_class.__name__}_with_{display.__class__.__name__}", display=display.__class__.__name__, effect=effect_class.__name__, iterations=iterations, total_time_ms=sum(times), avg_time_ms=np.mean(times_arr), std_dev_ms=np.std(times_arr), min_ms=np.min(times_arr), max_ms=np.max(times_arr), fps=1000.0 / np.mean(times_arr) if np.mean(times_arr) > 0 else 0, chars_processed=chars * iterations, chars_per_sec=(chars * iterations) / (sum(times) / 1000) if sum(times) > 0 else 0, ) def get_available_displays(): """Get available display classes.""" from engine.display import ( DisplayRegistry, NullDisplay, TerminalDisplay, ) from engine.display.backends.sixel import SixelDisplay DisplayRegistry.initialize() displays = [ ("null", NullDisplay), ("terminal", TerminalDisplay), ] try: from engine.display.backends.websocket import WebSocketDisplay displays.append(("websocket", WebSocketDisplay)) except Exception: pass try: displays.append(("sixel", SixelDisplay)) except Exception: pass return displays def get_available_effects(): """Get available effect classes.""" try: from engine.effects.registry import get_effect_registry except Exception: return [] effects = [] registry = get_effect_registry() for name in registry.list_effects(): effect = registry.get(name) if effect: effects.append((name, effect)) return effects def run_benchmarks( displays: list[tuple[str, Any]] | None = None, effects: list[tuple[str, Any]] | None = None, iterations: int = 100, output_format: str = "text", ) -> BenchmarkReport: """Run all benchmarks and return report.""" from datetime import datetime if displays is None: displays = get_available_displays() if effects is None: effects = get_available_effects() buffer = get_sample_buffer(80, 24) results = [] print(f"Running benchmarks ({iterations} iterations each)...") print() for name, display_class in displays: print(f"Benchmarking display: {name}") try: result = benchmark_display(display_class, buffer, iterations) results.append(result) print(f" {result.fps:.1f} FPS, {result.avg_time_ms:.2f}ms avg") except Exception as e: print(f" Error: {e}") print() for effect_name, effect_class in effects: for display_name, display_class in displays: if display_name == "websocket": continue print(f"Benchmarking effect: {effect_name} with {display_name}") try: display = display_class() display.init(80, 24) result = benchmark_effect_with_display( effect_class, display, buffer, iterations ) results.append(result) print(f" {result.fps:.1f} FPS, {result.avg_time_ms:.2f}ms avg") except Exception as e: print(f" Error: {e}") summary = generate_summary(results) return BenchmarkReport( timestamp=datetime.now().isoformat(), python_version=sys.version, results=results, summary=summary, ) def generate_summary(results: list[BenchmarkResult]) -> dict[str, Any]: """Generate summary statistics from results.""" by_display: dict[str, list[BenchmarkResult]] = {} by_effect: dict[str, list[BenchmarkResult]] = {} for r in results: if r.display not in by_display: by_display[r.display] = [] by_display[r.display].append(r) if r.effect: if r.effect not in by_effect: by_effect[r.effect] = [] by_effect[r.effect].append(r) summary = { "by_display": {}, "by_effect": {}, "overall": { "total_tests": len(results), "displays_tested": len(by_display), "effects_tested": len(by_effect), }, } for display, res in by_display.items(): fps_values = [r.fps for r in res] summary["by_display"][display] = { "avg_fps": np.mean(fps_values), "min_fps": np.min(fps_values), "max_fps": np.max(fps_values), "tests": len(res), } for effect, res in by_effect.items(): fps_values = [r.fps for r in res] summary["by_effect"][effect] = { "avg_fps": np.mean(fps_values), "min_fps": np.min(fps_values), "max_fps": np.max(fps_values), "tests": len(res), } return summary def format_report_text(report: BenchmarkReport) -> str: """Format report as human-readable text.""" lines = [ "# Mainline Performance Benchmark Report", "", f"Generated: {report.timestamp}", f"Python: {report.python_version}", "", "## Summary", "", f"Total tests: {report.summary['overall']['total_tests']}", f"Displays tested: {report.summary['overall']['displays_tested']}", f"Effects tested: {report.summary['overall']['effects_tested']}", "", "## By Display", "", ] for display, stats in report.summary["by_display"].items(): lines.append(f"### {display}") lines.append(f"- Avg FPS: {stats['avg_fps']:.1f}") lines.append(f"- Min FPS: {stats['min_fps']:.1f}") lines.append(f"- Max FPS: {stats['max_fps']:.1f}") lines.append(f"- Tests: {stats['tests']}") lines.append("") if report.summary["by_effect"]: lines.append("## By Effect") lines.append("") for effect, stats in report.summary["by_effect"].items(): lines.append(f"### {effect}") lines.append(f"- Avg FPS: {stats['avg_fps']:.1f}") lines.append(f"- Min FPS: {stats['min_fps']:.1f}") lines.append(f"- Max FPS: {stats['max_fps']:.1f}") lines.append(f"- Tests: {stats['tests']}") lines.append("") lines.append("## Detailed Results") lines.append("") lines.append("| Display | Effect | FPS | Avg ms | StdDev ms | Min ms | Max ms |") lines.append("|---------|--------|-----|--------|-----------|--------|--------|") for r in report.results: effect_col = r.effect if r.effect else "-" lines.append( f"| {r.display} | {effect_col} | {r.fps:.1f} | {r.avg_time_ms:.2f} | " f"{r.std_dev_ms:.2f} | {r.min_ms:.2f} | {r.max_ms:.2f} |" ) return "\n".join(lines) def format_report_json(report: BenchmarkReport) -> str: """Format report as JSON.""" data = { "timestamp": report.timestamp, "python_version": report.python_version, "summary": report.summary, "results": [ { "name": r.name, "display": r.display, "effect": r.effect, "iterations": r.iterations, "total_time_ms": r.total_time_ms, "avg_time_ms": r.avg_time_ms, "std_dev_ms": r.std_dev_ms, "min_ms": r.min_ms, "max_ms": r.max_ms, "fps": r.fps, "chars_processed": r.chars_processed, "chars_per_sec": r.chars_per_sec, } for r in report.results ], } return json.dumps(data, indent=2) def main(): parser = argparse.ArgumentParser(description="Run mainline benchmarks") parser.add_argument( "--displays", help="Comma-separated list of displays to test (default: all)", ) parser.add_argument( "--effects", help="Comma-separated list of effects to test (default: all)", ) parser.add_argument( "--iterations", type=int, default=100, help="Number of iterations per test (default: 100)", ) parser.add_argument( "--output", help="Output file path (default: stdout)", ) parser.add_argument( "--format", choices=["text", "json"], default="text", help="Output format (default: text)", ) args = parser.parse_args() displays = None if args.displays: display_map = dict(get_available_displays()) displays = [ (name, display_map[name]) for name in args.displays.split(",") if name in display_map ] effects = None if args.effects: effect_map = dict(get_available_effects()) effects = [ (name, effect_map[name]) for name in args.effects.split(",") if name in effect_map ] report = run_benchmarks(displays, effects, args.iterations, args.format) if args.format == "json": output = format_report_json(report) else: output = format_report_text(report) if args.output: with open(args.output, "w") as f: f.write(output) print(f"Report written to {args.output}") else: print(output) if __name__ == "__main__": main()