feat(comparison): Add upstream vs sideline comparison framework

- Add comparison_presets.toml with 20+ preset configurations
- Add comparison_capture.py for frame capture and comparison
- Add run_comparison.py for running comparisons
- Add test_comparison_framework.py with comprehensive tests
- Add capture_upstream_comparison.py for upstream frame capture
- Add tomli to dev dependencies for TOML parsing

The framework supports:
- Multiple preset configurations (basic, effects, camera, source, viewport)
- Frame-by-frame comparison with detailed diff analysis
- Performance metrics comparison
- HTML report generation
- Integration with sideline branch for regression testing
This commit is contained in:
2026-03-21 16:06:23 -07:00
parent b058160e9d
commit 6c06f12c5a
6 changed files with 1484 additions and 0 deletions

243
tests/run_comparison.py Normal file
View File

@@ -0,0 +1,243 @@
"""Main comparison runner for upstream vs sideline testing.
This script runs comparisons between upstream and sideline implementations
using multiple presets and generates HTML reports.
"""
import argparse
import json
import sys
from pathlib import Path
from tests.comparison_capture import (
capture_frames,
compare_captured_outputs,
generate_html_report,
)
def load_comparison_presets() -> list[str]:
"""Load list of comparison presets from config file.
Returns:
List of preset names
"""
import tomli
config_file = Path("tests/comparison_presets.toml")
if not config_file.exists():
raise FileNotFoundError(f"Comparison presets not found: {config_file}")
with open(config_file, "rb") as f:
config = tomli.load(f)
presets = list(config.get("presets", {}).keys())
# Strip "presets." prefix if present
return [p.replace("presets.", "") for p in presets]
def run_comparison_for_preset(
preset_name: str,
sideline_only: bool = False,
upstream_file: Path | None = None,
) -> dict:
"""Run comparison for a single preset.
Args:
preset_name: Name of preset to test
sideline_only: If True, only capture sideline frames
upstream_file: Path to upstream captured output (if not None, use this instead of capturing)
Returns:
Comparison result dict
"""
print(f" Running preset: {preset_name}")
# Capture sideline frames
sideline_data = capture_frames(preset_name, frame_count=30)
sideline_file = Path(f"tests/comparison_output/{preset_name}_sideline.json")
if sideline_only:
return {
"preset": preset_name,
"status": "sideline_only",
"sideline_file": str(sideline_file),
}
# Use provided upstream file or look for it
if upstream_file:
upstream_path = upstream_file
else:
upstream_path = Path(f"tests/comparison_output/{preset_name}_upstream.json")
if not upstream_path.exists():
print(f" Warning: Upstream file not found: {upstream_path}")
return {
"preset": preset_name,
"status": "missing_upstream",
"sideline_file": str(sideline_file),
"upstream_file": str(upstream_path),
}
# Compare outputs
try:
comparison_result = compare_captured_outputs(
sideline_file=sideline_file,
upstream_file=upstream_path,
)
comparison_result["status"] = "success"
return comparison_result
except Exception as e:
print(f" Error comparing outputs: {e}")
return {
"preset": preset_name,
"status": "error",
"error": str(e),
"sideline_file": str(sideline_file),
"upstream_file": str(upstream_path),
}
def main():
"""Main entry point for comparison runner."""
parser = argparse.ArgumentParser(
description="Run comparison tests between upstream and sideline implementations"
)
parser.add_argument(
"--preset",
"-p",
help="Run specific preset (can be specified multiple times)",
action="append",
dest="presets",
)
parser.add_argument(
"--all",
"-a",
help="Run all comparison presets",
action="store_true",
)
parser.add_argument(
"--sideline-only",
"-s",
help="Only capture sideline frames (no comparison)",
action="store_true",
)
parser.add_argument(
"--upstream-file",
"-u",
help="Path to upstream captured output file",
type=Path,
)
parser.add_argument(
"--output-dir",
"-o",
help="Output directory for captured frames and reports",
type=Path,
default=Path("tests/comparison_output"),
)
parser.add_argument(
"--no-report",
help="Skip HTML report generation",
action="store_true",
)
args = parser.parse_args()
# Determine which presets to run
if args.presets:
presets_to_run = args.presets
elif args.all:
presets_to_run = load_comparison_presets()
else:
print("Error: Either --preset or --all must be specified")
print(f"Available presets: {', '.join(load_comparison_presets())}")
sys.exit(1)
print(f"Running comparison for {len(presets_to_run)} preset(s)")
print(f"Output directory: {args.output_dir}")
print()
# Run comparisons
results = []
for preset_name in presets_to_run:
try:
result = run_comparison_for_preset(
preset_name,
sideline_only=args.sideline_only,
upstream_file=args.upstream_file,
)
results.append(result)
if result["status"] == "success":
match_pct = result["stats"]["match_percentage"]
print(f" ✓ Match: {match_pct:.1f}%")
elif result["status"] == "missing_upstream":
print(f" ⚠ Missing upstream file")
elif result["status"] == "error":
print(f" ✗ Error: {result['error']}")
else:
print(f" ✓ Captured sideline only")
except Exception as e:
print(f" ✗ Failed: {e}")
results.append(
{
"preset": preset_name,
"status": "failed",
"error": str(e),
}
)
# Generate HTML report
if not args.no_report and not args.sideline_only:
successful_results = [r for r in results if r.get("status") == "success"]
if successful_results:
print(f"\nGenerating HTML report...")
report_file = generate_html_report(successful_results, args.output_dir)
print(f" Report saved to: {report_file}")
# Also save summary JSON
summary_file = args.output_dir / "comparison_summary.json"
with open(summary_file, "w") as f:
json.dump(
{
"timestamp": __import__("datetime").datetime.now().isoformat(),
"presets_tested": [r["preset"] for r in results],
"results": results,
},
f,
indent=2,
)
print(f" Summary saved to: {summary_file}")
else:
print(f"\nNote: No successful comparisons to report.")
print(f" Capture files saved in {args.output_dir}")
print(f" Run comparison when upstream files are available.")
# Print summary
print("\n" + "=" * 60)
print("SUMMARY")
print("=" * 60)
status_counts = {}
for result in results:
status = result.get("status", "unknown")
status_counts[status] = status_counts.get(status, 0) + 1
for status, count in sorted(status_counts.items()):
print(f" {status}: {count}")
if "success" in status_counts:
successful_results = [r for r in results if r.get("status") == "success"]
avg_match = sum(
r["stats"]["match_percentage"] for r in successful_results
) / len(successful_results)
print(f"\n Average match rate: {avg_match:.1f}%")
# Exit with error code if any failures
if any(r.get("status") in ["error", "failed"] for r in results):
sys.exit(1)
if __name__ == "__main__":
main()