refactor: modularize display backends and add benchmark runner

- Create engine/display/ package with registry pattern - Move displays to engine/display/backends/ (terminal, null, websocket, sixel) - Add DisplayRegistry with auto-discovery - Add benchmark.py for performance testing effects × displays matrix - Add mise tasks: benchmark, benchmark-json, benchmark-report - Update controller to use new display module
2026-03-15 22:25:28 -07:00
parent 22dd063baa
commit 829c4ab63d
10 changed files with 694 additions and 150 deletions
--- a/engine/benchmark.py
+++ b/engine/benchmark.py
@@ -0,0 +1,431 @@
+#!/usr/bin/env python3
+"""
+Benchmark runner for mainline - tests performance across effects and displays.
+
+Usage:
+    python -m engine.benchmark
+    python -m engine.benchmark --output report.md
+    python -m engine.benchmark --displays terminal,websocket --effects glitch,fade
+"""
+
+import argparse
+import json
+import sys
+import time
+from dataclasses import dataclass, field
+from typing import Any
+
+import numpy as np
+
+
+@dataclass
+class BenchmarkResult:
+    """Result of a single benchmark run."""
+
+    name: str
+    display: str
+    effect: str | None
+    iterations: int
+    total_time_ms: float
+    avg_time_ms: float
+    std_dev_ms: float
+    min_ms: float
+    max_ms: float
+    fps: float
+    chars_processed: int
+    chars_per_sec: float
+
+
+@dataclass
+class BenchmarkReport:
+    """Complete benchmark report."""
+
+    timestamp: str
+    python_version: str
+    results: list[BenchmarkResult] = field(default_factory=list)
+    summary: dict[str, Any] = field(default_factory=dict)
+
+
+def get_sample_buffer(width: int = 80, height: int = 24) -> list[str]:
+    """Generate a sample buffer for benchmarking."""
+    lines = []
+    for i in range(height):
+        line = f"\x1b[32mLine {i}\x1b[0m " + "A" * (width - 10)
+        lines.append(line)
+    return lines
+
+
+def benchmark_display(
+    display_class, buffer: list[str], iterations: int = 100
+) -> BenchmarkResult:
+    """Benchmark a single display."""
+    display = display_class()
+    display.init(80, 24)
+
+    times = []
+    chars = sum(len(line) for line in buffer)
+
+    for _ in range(iterations):
+        t0 = time.perf_counter()
+        display.show(buffer)
+        elapsed = (time.perf_counter() - t0) * 1000
+        times.append(elapsed)
+
+    display.cleanup()
+
+    times_arr = np.array(times)
+
+    return BenchmarkResult(
+        name=f"display_{display_class.__name__}",
+        display=display_class.__name__,
+        effect=None,
+        iterations=iterations,
+        total_time_ms=sum(times),
+        avg_time_ms=np.mean(times_arr),
+        std_dev_ms=np.std(times_arr),
+        min_ms=np.min(times_arr),
+        max_ms=np.max(times_arr),
+        fps=1000.0 / np.mean(times_arr) if np.mean(times_arr) > 0 else 0,
+        chars_processed=chars * iterations,
+        chars_per_sec=(chars * iterations) / (sum(times) / 1000)
+        if sum(times) > 0
+        else 0,
+    )
+
+
+def benchmark_effect_with_display(
+    effect_class, display, buffer: list[str], iterations: int = 100
+) -> BenchmarkResult:
+    """Benchmark an effect with a display."""
+    effect = effect_class()
+    effect.configure(enabled=True, intensity=1.0)
+
+    times = []
+    chars = sum(len(line) for line in buffer)
+
+    for _ in range(iterations):
+        processed = effect.process(buffer)
+        t0 = time.perf_counter()
+        display.show(processed)
+        elapsed = (time.perf_counter() - t0) * 1000
+        times.append(elapsed)
+
+    display.cleanup()
+
+    times_arr = np.array(times)
+
+    return BenchmarkResult(
+        name=f"effect_{effect_class.__name__}_with_{display.__class__.__name__}",
+        display=display.__class__.__name__,
+        effect=effect_class.__name__,
+        iterations=iterations,
+        total_time_ms=sum(times),
+        avg_time_ms=np.mean(times_arr),
+        std_dev_ms=np.std(times_arr),
+        min_ms=np.min(times_arr),
+        max_ms=np.max(times_arr),
+        fps=1000.0 / np.mean(times_arr) if np.mean(times_arr) > 0 else 0,
+        chars_processed=chars * iterations,
+        chars_per_sec=(chars * iterations) / (sum(times) / 1000)
+        if sum(times) > 0
+        else 0,
+    )
+
+
+def get_available_displays():
+    """Get available display classes."""
+    from engine.display import (
+        DisplayRegistry,
+        NullDisplay,
+        TerminalDisplay,
+    )
+    from engine.display.backends.sixel import SixelDisplay
+
+    DisplayRegistry.initialize()
+
+    displays = [
+        ("null", NullDisplay),
+        ("terminal", TerminalDisplay),
+    ]
+
+    try:
+        from engine.display.backends.websocket import WebSocketDisplay
+
+        displays.append(("websocket", WebSocketDisplay))
+    except Exception:
+        pass
+
+    try:
+        displays.append(("sixel", SixelDisplay))
+    except Exception:
+        pass
+
+    return displays
+
+
+def get_available_effects():
+    """Get available effect classes."""
+    try:
+        from engine.effects.registry import get_effect_registry
+    except Exception:
+        return []
+
+    effects = []
+    registry = get_effect_registry()
+
+    for name in registry.list_effects():
+        effect = registry.get(name)
+        if effect:
+            effects.append((name, effect))
+
+    return effects
+
+
+def run_benchmarks(
+    displays: list[tuple[str, Any]] | None = None,
+    effects: list[tuple[str, Any]] | None = None,
+    iterations: int = 100,
+    output_format: str = "text",
+) -> BenchmarkReport:
+    """Run all benchmarks and return report."""
+    from datetime import datetime
+
+    if displays is None:
+        displays = get_available_displays()
+
+    if effects is None:
+        effects = get_available_effects()
+
+    buffer = get_sample_buffer(80, 24)
+    results = []
+
+    print(f"Running benchmarks ({iterations} iterations each)...")
+    print()
+
+    for name, display_class in displays:
+        print(f"Benchmarking display: {name}")
+        try:
+            result = benchmark_display(display_class, buffer, iterations)
+            results.append(result)
+            print(f"  {result.fps:.1f} FPS, {result.avg_time_ms:.2f}ms avg")
+        except Exception as e:
+            print(f"  Error: {e}")
+
+    print()
+
+    for effect_name, effect_class in effects:
+        for display_name, display_class in displays:
+            if display_name == "websocket":
+                continue
+            print(f"Benchmarking effect: {effect_name} with {display_name}")
+            try:
+                display = display_class()
+                display.init(80, 24)
+                result = benchmark_effect_with_display(
+                    effect_class, display, buffer, iterations
+                )
+                results.append(result)
+                print(f"  {result.fps:.1f} FPS, {result.avg_time_ms:.2f}ms avg")
+            except Exception as e:
+                print(f"  Error: {e}")
+
+    summary = generate_summary(results)
+
+    return BenchmarkReport(
+        timestamp=datetime.now().isoformat(),
+        python_version=sys.version,
+        results=results,
+        summary=summary,
+    )
+
+
+def generate_summary(results: list[BenchmarkResult]) -> dict[str, Any]:
+    """Generate summary statistics from results."""
+    by_display: dict[str, list[BenchmarkResult]] = {}
+    by_effect: dict[str, list[BenchmarkResult]] = {}
+
+    for r in results:
+        if r.display not in by_display:
+            by_display[r.display] = []
+        by_display[r.display].append(r)
+
+        if r.effect:
+            if r.effect not in by_effect:
+                by_effect[r.effect] = []
+            by_effect[r.effect].append(r)
+
+    summary = {
+        "by_display": {},
+        "by_effect": {},
+        "overall": {
+            "total_tests": len(results),
+            "displays_tested": len(by_display),
+            "effects_tested": len(by_effect),
+        },
+    }
+
+    for display, res in by_display.items():
+        fps_values = [r.fps for r in res]
+        summary["by_display"][display] = {
+            "avg_fps": np.mean(fps_values),
+            "min_fps": np.min(fps_values),
+            "max_fps": np.max(fps_values),
+            "tests": len(res),
+        }
+
+    for effect, res in by_effect.items():
+        fps_values = [r.fps for r in res]
+        summary["by_effect"][effect] = {
+            "avg_fps": np.mean(fps_values),
+            "min_fps": np.min(fps_values),
+            "max_fps": np.max(fps_values),
+            "tests": len(res),
+        }
+
+    return summary
+
+
+def format_report_text(report: BenchmarkReport) -> str:
+    """Format report as human-readable text."""
+    lines = [
+        "# Mainline Performance Benchmark Report",
+        "",
+        f"Generated: {report.timestamp}",
+        f"Python: {report.python_version}",
+        "",
+        "## Summary",
+        "",
+        f"Total tests: {report.summary['overall']['total_tests']}",
+        f"Displays tested: {report.summary['overall']['displays_tested']}",
+        f"Effects tested: {report.summary['overall']['effects_tested']}",
+        "",
+        "## By Display",
+        "",
+    ]
+
+    for display, stats in report.summary["by_display"].items():
+        lines.append(f"### {display}")
+        lines.append(f"- Avg FPS: {stats['avg_fps']:.1f}")
+        lines.append(f"- Min FPS: {stats['min_fps']:.1f}")
+        lines.append(f"- Max FPS: {stats['max_fps']:.1f}")
+        lines.append(f"- Tests: {stats['tests']}")
+        lines.append("")
+
+    if report.summary["by_effect"]:
+        lines.append("## By Effect")
+        lines.append("")
+
+        for effect, stats in report.summary["by_effect"].items():
+            lines.append(f"### {effect}")
+            lines.append(f"- Avg FPS: {stats['avg_fps']:.1f}")
+            lines.append(f"- Min FPS: {stats['min_fps']:.1f}")
+            lines.append(f"- Max FPS: {stats['max_fps']:.1f}")
+            lines.append(f"- Tests: {stats['tests']}")
+            lines.append("")
+
+    lines.append("## Detailed Results")
+    lines.append("")
+    lines.append("| Display | Effect | FPS | Avg ms | StdDev ms | Min ms | Max ms |")
+    lines.append("|---------|--------|-----|--------|-----------|--------|--------|")
+
+    for r in report.results:
+        effect_col = r.effect if r.effect else "-"
+        lines.append(
+            f"| {r.display} | {effect_col} | {r.fps:.1f} | {r.avg_time_ms:.2f} | "
+            f"{r.std_dev_ms:.2f} | {r.min_ms:.2f} | {r.max_ms:.2f} |"
+        )
+
+    return "\n".join(lines)
+
+
+def format_report_json(report: BenchmarkReport) -> str:
+    """Format report as JSON."""
+    data = {
+        "timestamp": report.timestamp,
+        "python_version": report.python_version,
+        "summary": report.summary,
+        "results": [
+            {
+                "name": r.name,
+                "display": r.display,
+                "effect": r.effect,
+                "iterations": r.iterations,
+                "total_time_ms": r.total_time_ms,
+                "avg_time_ms": r.avg_time_ms,
+                "std_dev_ms": r.std_dev_ms,
+                "min_ms": r.min_ms,
+                "max_ms": r.max_ms,
+                "fps": r.fps,
+                "chars_processed": r.chars_processed,
+                "chars_per_sec": r.chars_per_sec,
+            }
+            for r in report.results
+        ],
+    }
+    return json.dumps(data, indent=2)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Run mainline benchmarks")
+    parser.add_argument(
+        "--displays",
+        help="Comma-separated list of displays to test (default: all)",
+    )
+    parser.add_argument(
+        "--effects",
+        help="Comma-separated list of effects to test (default: all)",
+    )
+    parser.add_argument(
+        "--iterations",
+        type=int,
+        default=100,
+        help="Number of iterations per test (default: 100)",
+    )
+    parser.add_argument(
+        "--output",
+        help="Output file path (default: stdout)",
+    )
+    parser.add_argument(
+        "--format",
+        choices=["text", "json"],
+        default="text",
+        help="Output format (default: text)",
+    )
+
+    args = parser.parse_args()
+
+    displays = None
+    if args.displays:
+        display_map = dict(get_available_displays())
+        displays = [
+            (name, display_map[name])
+            for name in args.displays.split(",")
+            if name in display_map
+        ]
+
+    effects = None
+    if args.effects:
+        effect_map = dict(get_available_effects())
+        effects = [
+            (name, effect_map[name])
+            for name in args.effects.split(",")
+            if name in effect_map
+        ]
+
+    report = run_benchmarks(displays, effects, args.iterations, args.format)
+
+    if args.format == "json":
+        output = format_report_json(report)
+    else:
+        output = format_report_text(report)
+
+    if args.output:
+        with open(args.output, "w") as f:
+            f.write(output)
+        print(f"Report written to {args.output}")
+    else:
+        print(output)
+
+
+if __name__ == "__main__":
+    main()