feat(comparison): Add upstream vs sideline comparison framework
- Add comparison_presets.toml with 20+ preset configurations - Add comparison_capture.py for frame capture and comparison - Add run_comparison.py for running comparisons - Add test_comparison_framework.py with comprehensive tests - Add capture_upstream_comparison.py for upstream frame capture - Add tomli to dev dependencies for TOML parsing The framework supports: - Multiple preset configurations (basic, effects, camera, source, viewport) - Frame-by-frame comparison with detailed diff analysis - Performance metrics comparison - HTML report generation - Integration with sideline branch for regression testing
This commit is contained in:
243
tests/run_comparison.py
Normal file
243
tests/run_comparison.py
Normal file
@@ -0,0 +1,243 @@
|
||||
"""Main comparison runner for upstream vs sideline testing.
|
||||
|
||||
This script runs comparisons between upstream and sideline implementations
|
||||
using multiple presets and generates HTML reports.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from tests.comparison_capture import (
|
||||
capture_frames,
|
||||
compare_captured_outputs,
|
||||
generate_html_report,
|
||||
)
|
||||
|
||||
|
||||
def load_comparison_presets() -> list[str]:
|
||||
"""Load list of comparison presets from config file.
|
||||
|
||||
Returns:
|
||||
List of preset names
|
||||
"""
|
||||
import tomli
|
||||
|
||||
config_file = Path("tests/comparison_presets.toml")
|
||||
if not config_file.exists():
|
||||
raise FileNotFoundError(f"Comparison presets not found: {config_file}")
|
||||
|
||||
with open(config_file, "rb") as f:
|
||||
config = tomli.load(f)
|
||||
|
||||
presets = list(config.get("presets", {}).keys())
|
||||
# Strip "presets." prefix if present
|
||||
return [p.replace("presets.", "") for p in presets]
|
||||
|
||||
|
||||
def run_comparison_for_preset(
|
||||
preset_name: str,
|
||||
sideline_only: bool = False,
|
||||
upstream_file: Path | None = None,
|
||||
) -> dict:
|
||||
"""Run comparison for a single preset.
|
||||
|
||||
Args:
|
||||
preset_name: Name of preset to test
|
||||
sideline_only: If True, only capture sideline frames
|
||||
upstream_file: Path to upstream captured output (if not None, use this instead of capturing)
|
||||
|
||||
Returns:
|
||||
Comparison result dict
|
||||
"""
|
||||
print(f" Running preset: {preset_name}")
|
||||
|
||||
# Capture sideline frames
|
||||
sideline_data = capture_frames(preset_name, frame_count=30)
|
||||
sideline_file = Path(f"tests/comparison_output/{preset_name}_sideline.json")
|
||||
|
||||
if sideline_only:
|
||||
return {
|
||||
"preset": preset_name,
|
||||
"status": "sideline_only",
|
||||
"sideline_file": str(sideline_file),
|
||||
}
|
||||
|
||||
# Use provided upstream file or look for it
|
||||
if upstream_file:
|
||||
upstream_path = upstream_file
|
||||
else:
|
||||
upstream_path = Path(f"tests/comparison_output/{preset_name}_upstream.json")
|
||||
|
||||
if not upstream_path.exists():
|
||||
print(f" Warning: Upstream file not found: {upstream_path}")
|
||||
return {
|
||||
"preset": preset_name,
|
||||
"status": "missing_upstream",
|
||||
"sideline_file": str(sideline_file),
|
||||
"upstream_file": str(upstream_path),
|
||||
}
|
||||
|
||||
# Compare outputs
|
||||
try:
|
||||
comparison_result = compare_captured_outputs(
|
||||
sideline_file=sideline_file,
|
||||
upstream_file=upstream_path,
|
||||
)
|
||||
comparison_result["status"] = "success"
|
||||
return comparison_result
|
||||
except Exception as e:
|
||||
print(f" Error comparing outputs: {e}")
|
||||
return {
|
||||
"preset": preset_name,
|
||||
"status": "error",
|
||||
"error": str(e),
|
||||
"sideline_file": str(sideline_file),
|
||||
"upstream_file": str(upstream_path),
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for comparison runner."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Run comparison tests between upstream and sideline implementations"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--preset",
|
||||
"-p",
|
||||
help="Run specific preset (can be specified multiple times)",
|
||||
action="append",
|
||||
dest="presets",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--all",
|
||||
"-a",
|
||||
help="Run all comparison presets",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sideline-only",
|
||||
"-s",
|
||||
help="Only capture sideline frames (no comparison)",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--upstream-file",
|
||||
"-u",
|
||||
help="Path to upstream captured output file",
|
||||
type=Path,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
"-o",
|
||||
help="Output directory for captured frames and reports",
|
||||
type=Path,
|
||||
default=Path("tests/comparison_output"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-report",
|
||||
help="Skip HTML report generation",
|
||||
action="store_true",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Determine which presets to run
|
||||
if args.presets:
|
||||
presets_to_run = args.presets
|
||||
elif args.all:
|
||||
presets_to_run = load_comparison_presets()
|
||||
else:
|
||||
print("Error: Either --preset or --all must be specified")
|
||||
print(f"Available presets: {', '.join(load_comparison_presets())}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Running comparison for {len(presets_to_run)} preset(s)")
|
||||
print(f"Output directory: {args.output_dir}")
|
||||
print()
|
||||
|
||||
# Run comparisons
|
||||
results = []
|
||||
for preset_name in presets_to_run:
|
||||
try:
|
||||
result = run_comparison_for_preset(
|
||||
preset_name,
|
||||
sideline_only=args.sideline_only,
|
||||
upstream_file=args.upstream_file,
|
||||
)
|
||||
results.append(result)
|
||||
|
||||
if result["status"] == "success":
|
||||
match_pct = result["stats"]["match_percentage"]
|
||||
print(f" ✓ Match: {match_pct:.1f}%")
|
||||
elif result["status"] == "missing_upstream":
|
||||
print(f" ⚠ Missing upstream file")
|
||||
elif result["status"] == "error":
|
||||
print(f" ✗ Error: {result['error']}")
|
||||
else:
|
||||
print(f" ✓ Captured sideline only")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Failed: {e}")
|
||||
results.append(
|
||||
{
|
||||
"preset": preset_name,
|
||||
"status": "failed",
|
||||
"error": str(e),
|
||||
}
|
||||
)
|
||||
|
||||
# Generate HTML report
|
||||
if not args.no_report and not args.sideline_only:
|
||||
successful_results = [r for r in results if r.get("status") == "success"]
|
||||
if successful_results:
|
||||
print(f"\nGenerating HTML report...")
|
||||
report_file = generate_html_report(successful_results, args.output_dir)
|
||||
print(f" Report saved to: {report_file}")
|
||||
|
||||
# Also save summary JSON
|
||||
summary_file = args.output_dir / "comparison_summary.json"
|
||||
with open(summary_file, "w") as f:
|
||||
json.dump(
|
||||
{
|
||||
"timestamp": __import__("datetime").datetime.now().isoformat(),
|
||||
"presets_tested": [r["preset"] for r in results],
|
||||
"results": results,
|
||||
},
|
||||
f,
|
||||
indent=2,
|
||||
)
|
||||
print(f" Summary saved to: {summary_file}")
|
||||
else:
|
||||
print(f"\nNote: No successful comparisons to report.")
|
||||
print(f" Capture files saved in {args.output_dir}")
|
||||
print(f" Run comparison when upstream files are available.")
|
||||
|
||||
# Print summary
|
||||
print("\n" + "=" * 60)
|
||||
print("SUMMARY")
|
||||
print("=" * 60)
|
||||
|
||||
status_counts = {}
|
||||
for result in results:
|
||||
status = result.get("status", "unknown")
|
||||
status_counts[status] = status_counts.get(status, 0) + 1
|
||||
|
||||
for status, count in sorted(status_counts.items()):
|
||||
print(f" {status}: {count}")
|
||||
|
||||
if "success" in status_counts:
|
||||
successful_results = [r for r in results if r.get("status") == "success"]
|
||||
avg_match = sum(
|
||||
r["stats"]["match_percentage"] for r in successful_results
|
||||
) / len(successful_results)
|
||||
print(f"\n Average match rate: {avg_match:.1f}%")
|
||||
|
||||
# Exit with error code if any failures
|
||||
if any(r.get("status") in ["error", "failed"] for r in results):
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user