Mainline/engine/benchmark.py

#!/usr/bin/env python3
"""
Benchmark runner for mainline - tests performance across effects and displays.

Usage:
    python -m engine.benchmark
    python -m engine.benchmark --output report.md
    python -m engine.benchmark --displays terminal,websocket --effects glitch,fade
"""

import argparse
import json
import sys
import time
from dataclasses import dataclass, field
from typing import Any

import numpy as np


@dataclass
class BenchmarkResult:
    """Result of a single benchmark run."""

    name: str
    display: str
    effect: str | None
    iterations: int
    total_time_ms: float
    avg_time_ms: float
    std_dev_ms: float
    min_ms: float
    max_ms: float
    fps: float
    chars_processed: int
    chars_per_sec: float


@dataclass
class BenchmarkReport:
    """Complete benchmark report."""

    timestamp: str
    python_version: str
    results: list[BenchmarkResult] = field(default_factory=list)
    summary: dict[str, Any] = field(default_factory=dict)


def get_sample_buffer(width: int = 80, height: int = 24) -> list[str]:
    """Generate a sample buffer for benchmarking."""
    lines = []
    for i in range(height):
        line = f"\x1b[32mLine {i}\x1b[0m " + "A" * (width - 10)
        lines.append(line)
    return lines


def benchmark_display(
    display_class, buffer: list[str], iterations: int = 100
) -> BenchmarkResult:
    """Benchmark a single display."""
    display = display_class()
    display.init(80, 24)

    times = []
    chars = sum(len(line) for line in buffer)

    for _ in range(iterations):
        t0 = time.perf_counter()
        display.show(buffer)
        elapsed = (time.perf_counter() - t0) * 1000
        times.append(elapsed)

    display.cleanup()

    times_arr = np.array(times)

    return BenchmarkResult(
        name=f"display_{display_class.__name__}",
        display=display_class.__name__,
        effect=None,
        iterations=iterations,
        total_time_ms=sum(times),
        avg_time_ms=np.mean(times_arr),
        std_dev_ms=np.std(times_arr),
        min_ms=np.min(times_arr),
        max_ms=np.max(times_arr),
        fps=1000.0 / np.mean(times_arr) if np.mean(times_arr) > 0 else 0,
        chars_processed=chars * iterations,
        chars_per_sec=(chars * iterations) / (sum(times) / 1000)
        if sum(times) > 0
        else 0,
    )


def benchmark_effect_with_display(
    effect_class, display, buffer: list[str], iterations: int = 100
) -> BenchmarkResult:
    """Benchmark an effect with a display."""
    effect = effect_class()
    effect.configure(enabled=True, intensity=1.0)

    times = []
    chars = sum(len(line) for line in buffer)

    for _ in range(iterations):
        processed = effect.process(buffer)
        t0 = time.perf_counter()
        display.show(processed)
        elapsed = (time.perf_counter() - t0) * 1000
        times.append(elapsed)

    display.cleanup()

    times_arr = np.array(times)

    return BenchmarkResult(
        name=f"effect_{effect_class.__name__}_with_{display.__class__.__name__}",
        display=display.__class__.__name__,
        effect=effect_class.__name__,
        iterations=iterations,
        total_time_ms=sum(times),
        avg_time_ms=np.mean(times_arr),
        std_dev_ms=np.std(times_arr),
        min_ms=np.min(times_arr),
        max_ms=np.max(times_arr),
        fps=1000.0 / np.mean(times_arr) if np.mean(times_arr) > 0 else 0,
        chars_processed=chars * iterations,
        chars_per_sec=(chars * iterations) / (sum(times) / 1000)
        if sum(times) > 0
        else 0,
    )


def get_available_displays():
    """Get available display classes."""
    from engine.display import (
        DisplayRegistry,
        NullDisplay,
        TerminalDisplay,
    )
    from engine.display.backends.sixel import SixelDisplay

    DisplayRegistry.initialize()

    displays = [
        ("null", NullDisplay),
        ("terminal", TerminalDisplay),
    ]

    try:
        from engine.display.backends.websocket import WebSocketDisplay

        displays.append(("websocket", WebSocketDisplay))
    except Exception:
        pass

    try:
        displays.append(("sixel", SixelDisplay))
    except Exception:
        pass

    return displays


def get_available_effects():
    """Get available effect classes."""
    try:
        from engine.effects.registry import get_effect_registry
    except Exception:
        return []

    effects = []
    registry = get_effect_registry()

    for name in registry.list_effects():
        effect = registry.get(name)
        if effect:
            effects.append((name, effect))

    return effects


def run_benchmarks(
    displays: list[tuple[str, Any]] | None = None,
    effects: list[tuple[str, Any]] | None = None,
    iterations: int = 100,
    output_format: str = "text",
) -> BenchmarkReport:
    """Run all benchmarks and return report."""
    from datetime import datetime

    if displays is None:
        displays = get_available_displays()

    if effects is None:
        effects = get_available_effects()

    buffer = get_sample_buffer(80, 24)
    results = []

    print(f"Running benchmarks ({iterations} iterations each)...")
    print()

    for name, display_class in displays:
        print(f"Benchmarking display: {name}")
        try:
            result = benchmark_display(display_class, buffer, iterations)
            results.append(result)
            print(f"  {result.fps:.1f} FPS, {result.avg_time_ms:.2f}ms avg")
        except Exception as e:
            print(f"  Error: {e}")

    print()

    for effect_name, effect_class in effects:
        for display_name, display_class in displays:
            if display_name == "websocket":
                continue
            print(f"Benchmarking effect: {effect_name} with {display_name}")
            try:
                display = display_class()
                display.init(80, 24)
                result = benchmark_effect_with_display(
                    effect_class, display, buffer, iterations
                )
                results.append(result)
                print(f"  {result.fps:.1f} FPS, {result.avg_time_ms:.2f}ms avg")
            except Exception as e:
                print(f"  Error: {e}")

    summary = generate_summary(results)

    return BenchmarkReport(
        timestamp=datetime.now().isoformat(),
        python_version=sys.version,
        results=results,
        summary=summary,
    )


def generate_summary(results: list[BenchmarkResult]) -> dict[str, Any]:
    """Generate summary statistics from results."""
    by_display: dict[str, list[BenchmarkResult]] = {}
    by_effect: dict[str, list[BenchmarkResult]] = {}

    for r in results:
        if r.display not in by_display:
            by_display[r.display] = []
        by_display[r.display].append(r)

        if r.effect:
            if r.effect not in by_effect:
                by_effect[r.effect] = []
            by_effect[r.effect].append(r)

    summary = {
        "by_display": {},
        "by_effect": {},
        "overall": {
            "total_tests": len(results),
            "displays_tested": len(by_display),
            "effects_tested": len(by_effect),
        },
    }

    for display, res in by_display.items():
        fps_values = [r.fps for r in res]
        summary["by_display"][display] = {
            "avg_fps": np.mean(fps_values),
            "min_fps": np.min(fps_values),
            "max_fps": np.max(fps_values),
            "tests": len(res),
        }

    for effect, res in by_effect.items():
        fps_values = [r.fps for r in res]
        summary["by_effect"][effect] = {
            "avg_fps": np.mean(fps_values),
            "min_fps": np.min(fps_values),
            "max_fps": np.max(fps_values),
            "tests": len(res),
        }

    return summary


def format_report_text(report: BenchmarkReport) -> str:
    """Format report as human-readable text."""
    lines = [
        "# Mainline Performance Benchmark Report",
        "",
        f"Generated: {report.timestamp}",
        f"Python: {report.python_version}",
        "",
        "## Summary",
        "",
        f"Total tests: {report.summary['overall']['total_tests']}",
        f"Displays tested: {report.summary['overall']['displays_tested']}",
        f"Effects tested: {report.summary['overall']['effects_tested']}",
        "",
        "## By Display",
        "",
    ]

    for display, stats in report.summary["by_display"].items():
        lines.append(f"### {display}")
        lines.append(f"- Avg FPS: {stats['avg_fps']:.1f}")
        lines.append(f"- Min FPS: {stats['min_fps']:.1f}")
        lines.append(f"- Max FPS: {stats['max_fps']:.1f}")
        lines.append(f"- Tests: {stats['tests']}")
        lines.append("")

    if report.summary["by_effect"]:
        lines.append("## By Effect")
        lines.append("")

        for effect, stats in report.summary["by_effect"].items():
            lines.append(f"### {effect}")
            lines.append(f"- Avg FPS: {stats['avg_fps']:.1f}")
            lines.append(f"- Min FPS: {stats['min_fps']:.1f}")
            lines.append(f"- Max FPS: {stats['max_fps']:.1f}")
            lines.append(f"- Tests: {stats['tests']}")
            lines.append("")

    lines.append("## Detailed Results")
    lines.append("")
    lines.append("| Display | Effect | FPS | Avg ms | StdDev ms | Min ms | Max ms |")
    lines.append("|---------|--------|-----|--------|-----------|--------|--------|")

    for r in report.results:
        effect_col = r.effect if r.effect else "-"
        lines.append(
            f"| {r.display} | {effect_col} | {r.fps:.1f} | {r.avg_time_ms:.2f} | "
            f"{r.std_dev_ms:.2f} | {r.min_ms:.2f} | {r.max_ms:.2f} |"
        )

    return "\n".join(lines)


def format_report_json(report: BenchmarkReport) -> str:
    """Format report as JSON."""
    data = {
        "timestamp": report.timestamp,
        "python_version": report.python_version,
        "summary": report.summary,
        "results": [
            {
                "name": r.name,
                "display": r.display,
                "effect": r.effect,
                "iterations": r.iterations,
                "total_time_ms": r.total_time_ms,
                "avg_time_ms": r.avg_time_ms,
                "std_dev_ms": r.std_dev_ms,
                "min_ms": r.min_ms,
                "max_ms": r.max_ms,
                "fps": r.fps,
                "chars_processed": r.chars_processed,
                "chars_per_sec": r.chars_per_sec,
            }
            for r in report.results
        ],
    }
    return json.dumps(data, indent=2)


def main():
    parser = argparse.ArgumentParser(description="Run mainline benchmarks")
    parser.add_argument(
        "--displays",
        help="Comma-separated list of displays to test (default: all)",
    )
    parser.add_argument(
        "--effects",
        help="Comma-separated list of effects to test (default: all)",
    )
    parser.add_argument(
        "--iterations",
        type=int,
        default=100,
        help="Number of iterations per test (default: 100)",
    )
    parser.add_argument(
        "--output",
        help="Output file path (default: stdout)",
    )
    parser.add_argument(
        "--format",
        choices=["text", "json"],
        default="text",
        help="Output format (default: text)",
    )

    args = parser.parse_args()

    displays = None
    if args.displays:
        display_map = dict(get_available_displays())
        displays = [
            (name, display_map[name])
            for name in args.displays.split(",")
            if name in display_map
        ]

    effects = None
    if args.effects:
        effect_map = dict(get_available_effects())
        effects = [
            (name, effect_map[name])
            for name in args.effects.split(",")
            if name in effect_map
        ]

    report = run_benchmarks(displays, effects, args.iterations, args.format)

    if args.format == "json":
        output = format_report_json(report)
    else:
        output = format_report_text(report)

    if args.output:
        with open(args.output, "w") as f:
            f.write(output)
        print(f"Report written to {args.output}")
    else:
        print(output)


if __name__ == "__main__":
    main()