Files
Mainline/engine/benchmark.py
David Gwilliam 829c4ab63d refactor: modularize display backends and add benchmark runner
- Create engine/display/ package with registry pattern
- Move displays to engine/display/backends/ (terminal, null, websocket, sixel)
- Add DisplayRegistry with auto-discovery
- Add benchmark.py for performance testing effects × displays matrix
- Add mise tasks: benchmark, benchmark-json, benchmark-report
- Update controller to use new display module
2026-03-15 22:25:28 -07:00

432 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Benchmark runner for mainline - tests performance across effects and displays.
Usage:
python -m engine.benchmark
python -m engine.benchmark --output report.md
python -m engine.benchmark --displays terminal,websocket --effects glitch,fade
"""
import argparse
import json
import sys
import time
from dataclasses import dataclass, field
from typing import Any
import numpy as np
@dataclass
class BenchmarkResult:
"""Result of a single benchmark run."""
name: str
display: str
effect: str | None
iterations: int
total_time_ms: float
avg_time_ms: float
std_dev_ms: float
min_ms: float
max_ms: float
fps: float
chars_processed: int
chars_per_sec: float
@dataclass
class BenchmarkReport:
"""Complete benchmark report."""
timestamp: str
python_version: str
results: list[BenchmarkResult] = field(default_factory=list)
summary: dict[str, Any] = field(default_factory=dict)
def get_sample_buffer(width: int = 80, height: int = 24) -> list[str]:
"""Generate a sample buffer for benchmarking."""
lines = []
for i in range(height):
line = f"\x1b[32mLine {i}\x1b[0m " + "A" * (width - 10)
lines.append(line)
return lines
def benchmark_display(
display_class, buffer: list[str], iterations: int = 100
) -> BenchmarkResult:
"""Benchmark a single display."""
display = display_class()
display.init(80, 24)
times = []
chars = sum(len(line) for line in buffer)
for _ in range(iterations):
t0 = time.perf_counter()
display.show(buffer)
elapsed = (time.perf_counter() - t0) * 1000
times.append(elapsed)
display.cleanup()
times_arr = np.array(times)
return BenchmarkResult(
name=f"display_{display_class.__name__}",
display=display_class.__name__,
effect=None,
iterations=iterations,
total_time_ms=sum(times),
avg_time_ms=np.mean(times_arr),
std_dev_ms=np.std(times_arr),
min_ms=np.min(times_arr),
max_ms=np.max(times_arr),
fps=1000.0 / np.mean(times_arr) if np.mean(times_arr) > 0 else 0,
chars_processed=chars * iterations,
chars_per_sec=(chars * iterations) / (sum(times) / 1000)
if sum(times) > 0
else 0,
)
def benchmark_effect_with_display(
effect_class, display, buffer: list[str], iterations: int = 100
) -> BenchmarkResult:
"""Benchmark an effect with a display."""
effect = effect_class()
effect.configure(enabled=True, intensity=1.0)
times = []
chars = sum(len(line) for line in buffer)
for _ in range(iterations):
processed = effect.process(buffer)
t0 = time.perf_counter()
display.show(processed)
elapsed = (time.perf_counter() - t0) * 1000
times.append(elapsed)
display.cleanup()
times_arr = np.array(times)
return BenchmarkResult(
name=f"effect_{effect_class.__name__}_with_{display.__class__.__name__}",
display=display.__class__.__name__,
effect=effect_class.__name__,
iterations=iterations,
total_time_ms=sum(times),
avg_time_ms=np.mean(times_arr),
std_dev_ms=np.std(times_arr),
min_ms=np.min(times_arr),
max_ms=np.max(times_arr),
fps=1000.0 / np.mean(times_arr) if np.mean(times_arr) > 0 else 0,
chars_processed=chars * iterations,
chars_per_sec=(chars * iterations) / (sum(times) / 1000)
if sum(times) > 0
else 0,
)
def get_available_displays():
"""Get available display classes."""
from engine.display import (
DisplayRegistry,
NullDisplay,
TerminalDisplay,
)
from engine.display.backends.sixel import SixelDisplay
DisplayRegistry.initialize()
displays = [
("null", NullDisplay),
("terminal", TerminalDisplay),
]
try:
from engine.display.backends.websocket import WebSocketDisplay
displays.append(("websocket", WebSocketDisplay))
except Exception:
pass
try:
displays.append(("sixel", SixelDisplay))
except Exception:
pass
return displays
def get_available_effects():
"""Get available effect classes."""
try:
from engine.effects.registry import get_effect_registry
except Exception:
return []
effects = []
registry = get_effect_registry()
for name in registry.list_effects():
effect = registry.get(name)
if effect:
effects.append((name, effect))
return effects
def run_benchmarks(
displays: list[tuple[str, Any]] | None = None,
effects: list[tuple[str, Any]] | None = None,
iterations: int = 100,
output_format: str = "text",
) -> BenchmarkReport:
"""Run all benchmarks and return report."""
from datetime import datetime
if displays is None:
displays = get_available_displays()
if effects is None:
effects = get_available_effects()
buffer = get_sample_buffer(80, 24)
results = []
print(f"Running benchmarks ({iterations} iterations each)...")
print()
for name, display_class in displays:
print(f"Benchmarking display: {name}")
try:
result = benchmark_display(display_class, buffer, iterations)
results.append(result)
print(f" {result.fps:.1f} FPS, {result.avg_time_ms:.2f}ms avg")
except Exception as e:
print(f" Error: {e}")
print()
for effect_name, effect_class in effects:
for display_name, display_class in displays:
if display_name == "websocket":
continue
print(f"Benchmarking effect: {effect_name} with {display_name}")
try:
display = display_class()
display.init(80, 24)
result = benchmark_effect_with_display(
effect_class, display, buffer, iterations
)
results.append(result)
print(f" {result.fps:.1f} FPS, {result.avg_time_ms:.2f}ms avg")
except Exception as e:
print(f" Error: {e}")
summary = generate_summary(results)
return BenchmarkReport(
timestamp=datetime.now().isoformat(),
python_version=sys.version,
results=results,
summary=summary,
)
def generate_summary(results: list[BenchmarkResult]) -> dict[str, Any]:
"""Generate summary statistics from results."""
by_display: dict[str, list[BenchmarkResult]] = {}
by_effect: dict[str, list[BenchmarkResult]] = {}
for r in results:
if r.display not in by_display:
by_display[r.display] = []
by_display[r.display].append(r)
if r.effect:
if r.effect not in by_effect:
by_effect[r.effect] = []
by_effect[r.effect].append(r)
summary = {
"by_display": {},
"by_effect": {},
"overall": {
"total_tests": len(results),
"displays_tested": len(by_display),
"effects_tested": len(by_effect),
},
}
for display, res in by_display.items():
fps_values = [r.fps for r in res]
summary["by_display"][display] = {
"avg_fps": np.mean(fps_values),
"min_fps": np.min(fps_values),
"max_fps": np.max(fps_values),
"tests": len(res),
}
for effect, res in by_effect.items():
fps_values = [r.fps for r in res]
summary["by_effect"][effect] = {
"avg_fps": np.mean(fps_values),
"min_fps": np.min(fps_values),
"max_fps": np.max(fps_values),
"tests": len(res),
}
return summary
def format_report_text(report: BenchmarkReport) -> str:
"""Format report as human-readable text."""
lines = [
"# Mainline Performance Benchmark Report",
"",
f"Generated: {report.timestamp}",
f"Python: {report.python_version}",
"",
"## Summary",
"",
f"Total tests: {report.summary['overall']['total_tests']}",
f"Displays tested: {report.summary['overall']['displays_tested']}",
f"Effects tested: {report.summary['overall']['effects_tested']}",
"",
"## By Display",
"",
]
for display, stats in report.summary["by_display"].items():
lines.append(f"### {display}")
lines.append(f"- Avg FPS: {stats['avg_fps']:.1f}")
lines.append(f"- Min FPS: {stats['min_fps']:.1f}")
lines.append(f"- Max FPS: {stats['max_fps']:.1f}")
lines.append(f"- Tests: {stats['tests']}")
lines.append("")
if report.summary["by_effect"]:
lines.append("## By Effect")
lines.append("")
for effect, stats in report.summary["by_effect"].items():
lines.append(f"### {effect}")
lines.append(f"- Avg FPS: {stats['avg_fps']:.1f}")
lines.append(f"- Min FPS: {stats['min_fps']:.1f}")
lines.append(f"- Max FPS: {stats['max_fps']:.1f}")
lines.append(f"- Tests: {stats['tests']}")
lines.append("")
lines.append("## Detailed Results")
lines.append("")
lines.append("| Display | Effect | FPS | Avg ms | StdDev ms | Min ms | Max ms |")
lines.append("|---------|--------|-----|--------|-----------|--------|--------|")
for r in report.results:
effect_col = r.effect if r.effect else "-"
lines.append(
f"| {r.display} | {effect_col} | {r.fps:.1f} | {r.avg_time_ms:.2f} | "
f"{r.std_dev_ms:.2f} | {r.min_ms:.2f} | {r.max_ms:.2f} |"
)
return "\n".join(lines)
def format_report_json(report: BenchmarkReport) -> str:
"""Format report as JSON."""
data = {
"timestamp": report.timestamp,
"python_version": report.python_version,
"summary": report.summary,
"results": [
{
"name": r.name,
"display": r.display,
"effect": r.effect,
"iterations": r.iterations,
"total_time_ms": r.total_time_ms,
"avg_time_ms": r.avg_time_ms,
"std_dev_ms": r.std_dev_ms,
"min_ms": r.min_ms,
"max_ms": r.max_ms,
"fps": r.fps,
"chars_processed": r.chars_processed,
"chars_per_sec": r.chars_per_sec,
}
for r in report.results
],
}
return json.dumps(data, indent=2)
def main():
parser = argparse.ArgumentParser(description="Run mainline benchmarks")
parser.add_argument(
"--displays",
help="Comma-separated list of displays to test (default: all)",
)
parser.add_argument(
"--effects",
help="Comma-separated list of effects to test (default: all)",
)
parser.add_argument(
"--iterations",
type=int,
default=100,
help="Number of iterations per test (default: 100)",
)
parser.add_argument(
"--output",
help="Output file path (default: stdout)",
)
parser.add_argument(
"--format",
choices=["text", "json"],
default="text",
help="Output format (default: text)",
)
args = parser.parse_args()
displays = None
if args.displays:
display_map = dict(get_available_displays())
displays = [
(name, display_map[name])
for name in args.displays.split(",")
if name in display_map
]
effects = None
if args.effects:
effect_map = dict(get_available_effects())
effects = [
(name, effect_map[name])
for name in args.effects.split(",")
if name in effect_map
]
report = run_benchmarks(displays, effects, args.iterations, args.format)
if args.format == "json":
output = format_report_json(report)
else:
output = format_report_text(report)
if args.output:
with open(args.output, "w") as f:
f.write(output)
print(f"Report written to {args.output}")
else:
print(output)
if __name__ == "__main__":
main()