sideline/tests/test_performance_regression.py

"""Performance regression tests for pipeline stages with realistic data volumes.

These tests verify that the pipeline maintains performance with large datasets
by ensuring ViewportFilterStage prevents FontStage from rendering excessive items.

Uses pytest-benchmark for statistical benchmarking with automatic regression detection.
"""

import pytest

from engine.data_sources.sources import SourceItem
from engine.pipeline.adapters import FontStage, ViewportFilterStage
from engine.pipeline.core import PipelineContext
from engine.pipeline.params import PipelineParams


class TestViewportFilterPerformance:
    """Test ViewportFilterStage performance with realistic data volumes."""

    @pytest.mark.benchmark
    def test_filter_2000_items_to_viewport(self, benchmark):
        """Benchmark: Filter 2000 items to viewport size.

        Performance threshold: Must complete in < 1ms per iteration
        This tests the filtering overhead is negligible.
        """
        # Create 2000 test items (more than real headline sources)
        test_items = [
            SourceItem(f"Headline {i}", f"source-{i % 10}", str(i)) for i in range(2000)
        ]

        stage = ViewportFilterStage()
        ctx = PipelineContext()
        ctx.params = PipelineParams(viewport_height=24)

        result = benchmark(stage.process, test_items, ctx)

        # Verify result is correct - viewport filter takes first N items
        assert len(result) <= 24  # viewport height
        assert len(result) > 0

    @pytest.mark.benchmark
    def test_font_stage_with_filtered_items(self, benchmark):
        """Benchmark: FontStage rendering filtered (5) items.

        Performance threshold: Must complete in < 50ms per iteration
        This tests that filtering saves significant time by reducing FontStage work.
        """
        # Create filtered items (what ViewportFilterStage outputs)
        filtered_items = [
            SourceItem(f"Headline {i}", "source", str(i))
            for i in range(5)  # Filtered count
        ]

        font_stage = FontStage()
        ctx = PipelineContext()
        ctx.params = PipelineParams()

        result = benchmark(font_stage.process, filtered_items, ctx)

        # Should render successfully
        assert result is not None
        assert isinstance(result, list)
        assert len(result) > 0

    def test_filter_reduces_work_by_288x(self):
        """Verify ViewportFilterStage achieves expected performance improvement.

        With 1438 items and 24-line viewport:
        - Without filter: FontStage renders all 1438 items
        - With filter: FontStage renders ~4 items (height-based)
        - Expected improvement: 1438 / 4 ≈ 360x
        """
        test_items = [
            SourceItem(f"Headline {i}", "source", str(i)) for i in range(1438)
        ]

        stage = ViewportFilterStage()
        ctx = PipelineContext()
        ctx.params = PipelineParams(viewport_height=24)

        filtered = stage.process(test_items, ctx)
        improvement_factor = len(test_items) / len(filtered)

        # Verify we get significant improvement (height-based filtering)
        assert 300 < improvement_factor < 500
        # Verify filtered count is ~4 (24 viewport / 6 rows per item)
        assert len(filtered) == 4


class TestPipelinePerformanceWithRealData:
    """Integration tests for full pipeline performance with large datasets."""

    def test_pipeline_handles_large_item_count(self):
        """Test that pipeline doesn't hang with 2000+ items due to filtering."""
        # Create large dataset
        large_items = [
            SourceItem(f"Headline {i}", f"source-{i % 5}", str(i)) for i in range(2000)
        ]

        filter_stage = ViewportFilterStage()
        font_stage = FontStage()

        ctx = PipelineContext()
        ctx.params = PipelineParams(viewport_height=24)

        # Filter should reduce items quickly
        filtered = filter_stage.process(large_items, ctx)
        assert len(filtered) < len(large_items)

        # FontStage should process filtered items quickly
        rendered = font_stage.process(filtered, ctx)
        assert rendered is not None

    def test_multiple_viewports_filter_correctly(self):
        """Test that filter respects different viewport configurations."""
        large_items = [
            SourceItem(f"Headline {i}", "source", str(i)) for i in range(1000)
        ]

        stage = ViewportFilterStage()

        # Test different viewport heights
        test_cases = [
            (12, 12),  # 12px height -> 12 items
            (24, 24),  # 24px height -> 24 items
            (48, 48),  # 48px height -> 48 items
        ]

        for viewport_height, expected_max_items in test_cases:
            ctx = PipelineContext()
            ctx.params = PipelineParams(viewport_height=viewport_height)

            filtered = stage.process(large_items, ctx)

            # Verify filtering is proportional to viewport
            assert len(filtered) <= expected_max_items + 1
            assert len(filtered) > 0


class TestPerformanceRegressions:
    """Tests that catch common performance regressions."""

    def test_filter_doesnt_render_all_items(self):
        """Regression test: Ensure filter doesn't accidentally render all items.

        This would indicate that ViewportFilterStage is broken or bypassed.
        """
        large_items = [
            SourceItem(f"Headline {i}", "source", str(i)) for i in range(1438)
        ]

        stage = ViewportFilterStage()
        ctx = PipelineContext()
        ctx.params = PipelineParams()

        filtered = stage.process(large_items, ctx)

        # Should NOT have all items (regression detection)
        assert len(filtered) != len(large_items)
        # With height-based filtering, ~4 items fit in 24-row viewport (6 rows/item)
        assert len(filtered) == 4

    def test_font_stage_doesnt_hang_with_filter(self):
        """Regression test: FontStage shouldn't hang when receiving filtered data.

        Previously, FontStage would render all items, causing 10+ second hangs.
        Now it should receive only ~5 items and complete quickly.
        """
        # Simulate what happens after ViewportFilterStage
        filtered_items = [
            SourceItem(f"Headline {i}", "source", str(i))
            for i in range(5)  # What filter outputs
        ]

        font_stage = FontStage()
        ctx = PipelineContext()
        ctx.params = PipelineParams()

        # Should complete instantly (not hang)
        result = font_stage.process(filtered_items, ctx)

        # Verify it actually worked
        assert result is not None
        assert isinstance(result, list)