feat: Add fast startup fetch and background caching

- Add for quick startup using first N feeds - Add background thread for full fetch and caching - Update to use fast fetch - Update docs and skills
2026-03-19 22:38:55 -07:00
parent 4f2cf49a80
commit 7eaa441574
13 changed files with 393 additions and 220 deletions
--- a/engine/app/main.py
+++ b/engine/app/main.py
@@ -8,7 +8,7 @@ import time
 from engine import config
 from engine.display import BorderMode, DisplayRegistry
 from engine.effects import get_registry
-from engine.fetch import fetch_all, fetch_poetry, load_cache
+from engine.fetch import fetch_all, fetch_all_fast, fetch_poetry, load_cache, save_cache
 from engine.pipeline import (
    Pipeline,
    PipelineConfig,
@@ -208,7 +208,18 @@ def run_pipeline_mode_direct():
        if cached:
            source_items = cached
        else:
-            source_items, _, _ = fetch_all()
+            source_items = fetch_all_fast()
+            if source_items:
+                import threading
+
+                def background_fetch():
+                    full_items, _, _ = fetch_all()
+                    save_cache(full_items)
+
+                background_thread = threading.Thread(
+                    target=background_fetch, daemon=True
+                )
+                background_thread.start()
    elif source_name == "fixture":
        source_items = load_cache()
        if not source_items:
--- a/engine/app/pipeline_runner.py
+++ b/engine/app/pipeline_runner.py
@@ -8,7 +8,7 @@ from typing import Any

 from engine.display import BorderMode, DisplayRegistry
 from engine.effects import get_registry
-from engine.fetch import fetch_all, fetch_poetry, load_cache
+from engine.fetch import fetch_all, fetch_all_fast, fetch_poetry, load_cache, save_cache
 from engine.pipeline import Pipeline, PipelineConfig, PipelineContext, get_preset
 from engine.pipeline.adapters import (
    EffectPluginStage,
@@ -138,14 +138,7 @@ def run_pipeline_mode(preset_name: str = "demo"):
                print("Error: Invalid viewport format. Use WxH (e.g., 40x15)")
                sys.exit(1)

-    pipeline = Pipeline(
-        config=PipelineConfig(
-            source=preset.source,
-            display=preset.display,
-            camera=preset.camera,
-            effects=preset.effects,
-        )
-    )
+    pipeline = Pipeline(config=preset.to_config())

    print("  \033[38;5;245mFetching content...\033[0m")

@@ -167,10 +160,24 @@ def run_pipeline_mode(preset_name: str = "demo"):
        cached = load_cache()
        if cached:
            items = cached
+            print(f"  \033[38;5;82mLoaded {len(items)} items from cache\033[0m")
        elif preset.source == "poetry":
            items, _, _ = fetch_poetry()
        else:
-            items, _, _ = fetch_all()
+            items = fetch_all_fast()
+            if items:
+                print(
+                    f"  \033[38;5;82mFast start: {len(items)} items from first 5 sources\033[0m"
+                )
+
+            import threading
+
+            def background_fetch():
+                full_items, _, _ = fetch_all()
+                save_cache(full_items)
+
+            background_thread = threading.Thread(target=background_fetch, daemon=True)
+            background_thread.start()

        if not items:
            print("  \033[38;5;196mNo content available\033[0m")
--- a/engine/camera.py
+++ b/engine/camera.py
@@ -72,6 +72,17 @@ class Camera:
        """Shorthand for viewport_width."""
        return self.viewport_width

+    def set_speed(self, speed: float) -> None:
+        """Set the camera scroll speed dynamically.
+
+        This allows camera speed to be modulated during runtime
+        via PipelineParams or directly.
+
+        Args:
+            speed: New speed value (0.0 = stopped, >0 = movement)
+        """
+        self.speed = max(0.0, speed)
+
    @property
    def h(self) -> int:
        """Shorthand for viewport_height."""
@@ -373,10 +384,11 @@ class Camera:
            truncated_line = vis_trunc(offset_line, viewport_width)

            # Pad line to full viewport width to prevent ghosting when panning
+            # Skip padding for empty lines to preserve intentional blank lines
            import re

            visible_len = len(re.sub(r"\x1b\[[0-9;]*m", "", truncated_line))
-            if visible_len < viewport_width:
+            if visible_len < viewport_width and visible_len > 0:
                truncated_line += " " * (viewport_width - visible_len)

            horizontal_slice.append(truncated_line)
--- a/engine/fetch.py
+++ b/engine/fetch.py
@@ -7,6 +7,7 @@ import json
 import pathlib
 import re
 import urllib.request
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
 from typing import Any

@@ -17,54 +18,98 @@ from engine.filter import skip, strip_tags
 from engine.sources import FEEDS, POETRY_SOURCES
 from engine.terminal import boot_ln

-# Type alias for headline items
 HeadlineTuple = tuple[str, str, str]

+DEFAULT_MAX_WORKERS = 10
+FAST_START_SOURCES = 5
+FAST_START_TIMEOUT = 3

-# ─── SINGLE FEED ──────────────────────────────────────────
-def fetch_feed(url: str) -> Any | None:
-    """Fetch and parse a single RSS feed URL."""
+
+def fetch_feed(url: str) -> tuple[str, Any] | tuple[None, None]:
+    """Fetch and parse a single RSS feed URL. Returns (url, feed) tuple."""
    try:
        req = urllib.request.Request(url, headers={"User-Agent": "mainline/0.1"})
-        resp = urllib.request.urlopen(req, timeout=config.FEED_TIMEOUT)
-        return feedparser.parse(resp.read())
+        timeout = FAST_START_TIMEOUT if url in _fast_start_urls else config.FEED_TIMEOUT
+        resp = urllib.request.urlopen(req, timeout=timeout)
+        return (url, feedparser.parse(resp.read()))
    except Exception:
-        return None
+        return (url, None)
+
+
+def _parse_feed(feed: Any, src: str) -> list[HeadlineTuple]:
+    """Parse a feed and return list of headline tuples."""
+    items = []
+    if feed is None or (feed.bozo and not feed.entries):
+        return items
+
+    for e in feed.entries:
+        t = strip_tags(e.get("title", ""))
+        if not t or skip(t):
+            continue
+        pub = e.get("published_parsed") or e.get("updated_parsed")
+        try:
+            ts = datetime(*pub[:6]).strftime("%H:%M") if pub else "——:——"
+        except Exception:
+            ts = "——:——"
+        items.append((t, src, ts))
+    return items
+
+
+def fetch_all_fast() -> list[HeadlineTuple]:
+    """Fetch only the first N sources for fast startup."""
+    global _fast_start_urls
+    _fast_start_urls = set(list(FEEDS.values())[:FAST_START_SOURCES])
+
+    items: list[HeadlineTuple] = []
+    with ThreadPoolExecutor(max_workers=FAST_START_SOURCES) as executor:
+        futures = {
+            executor.submit(fetch_feed, url): src
+            for src, url in list(FEEDS.items())[:FAST_START_SOURCES]
+        }
+        for future in as_completed(futures):
+            src = futures[future]
+            url, feed = future.result()
+            if feed is None or (feed.bozo and not feed.entries):
+                boot_ln(src, "DARK", False)
+                continue
+            parsed = _parse_feed(feed, src)
+            if parsed:
+                items.extend(parsed)
+                boot_ln(src, f"LINKED [{len(parsed)}]", True)
+            else:
+                boot_ln(src, "EMPTY", False)
+    return items


-# ─── ALL RSS FEEDS ────────────────────────────────────────
 def fetch_all() -> tuple[list[HeadlineTuple], int, int]:
-    """Fetch all RSS feeds and return items, linked count, failed count."""
+    """Fetch all RSS feeds concurrently and return items, linked count, failed count."""
+    global _fast_start_urls
+    _fast_start_urls = set()
+
    items: list[HeadlineTuple] = []
    linked = failed = 0
-    for src, url in FEEDS.items():
-        feed = fetch_feed(url)
-        if feed is None or (feed.bozo and not feed.entries):
-            boot_ln(src, "DARK", False)
-            failed += 1
-            continue
-        n = 0
-        for e in feed.entries:
-            t = strip_tags(e.get("title", ""))
-            if not t or skip(t):
+
+    with ThreadPoolExecutor(max_workers=DEFAULT_MAX_WORKERS) as executor:
+        futures = {executor.submit(fetch_feed, url): src for src, url in FEEDS.items()}
+        for future in as_completed(futures):
+            src = futures[future]
+            url, feed = future.result()
+            if feed is None or (feed.bozo and not feed.entries):
+                boot_ln(src, "DARK", False)
+                failed += 1
                continue
-            pub = e.get("published_parsed") or e.get("updated_parsed")
-            try:
-                ts = datetime(*pub[:6]).strftime("%H:%M") if pub else "——:——"
-            except Exception:
-                ts = "——:——"
-            items.append((t, src, ts))
-            n += 1
-        if n:
-            boot_ln(src, f"LINKED [{n}]", True)
-            linked += 1
-        else:
-            boot_ln(src, "EMPTY", False)
-            failed += 1
+            parsed = _parse_feed(feed, src)
+            if parsed:
+                items.extend(parsed)
+                boot_ln(src, f"LINKED [{len(parsed)}]", True)
+                linked += 1
+            else:
+                boot_ln(src, "EMPTY", False)
+                failed += 1
+
    return items, linked, failed


-# ─── PROJECT GUTENBERG ────────────────────────────────────
 def _fetch_gutenberg(url: str, label: str) -> list[HeadlineTuple]:
    """Download and parse stanzas/passages from a Project Gutenberg text."""
    try:
@@ -76,23 +121,21 @@ def _fetch_gutenberg(url: str, label: str) -> list[HeadlineTuple]:
            .replace("\r\n", "\n")
            .replace("\r", "\n")
        )
-        # Strip PG boilerplate
        m = re.search(r"\*\*\*\s*START OF[^\n]*\n", text)
        if m:
            text = text[m.end() :]
        m = re.search(r"\*\*\*\s*END OF", text)
        if m:
            text = text[: m.start()]
-        # Split on blank lines into stanzas/passages
        blocks = re.split(r"\n{2,}", text.strip())
        items = []
        for blk in blocks:
-            blk = " ".join(blk.split())  # flatten to one line
+            blk = " ".join(blk.split())
            if len(blk) < 20 or len(blk) > 280:
                continue
-            if blk.isupper():  # skip all-caps headers
+            if blk.isupper():
                continue
-            if re.match(r"^[IVXLCDM]+\.?\s*$", blk):  # roman numerals
+            if re.match(r"^[IVXLCDM]+\.?\s*$", blk):
                continue
            items.append((blk, label, ""))
        return items
@@ -100,29 +143,35 @@ def _fetch_gutenberg(url: str, label: str) -> list[HeadlineTuple]:
        return []


-def fetch_poetry():
-    """Fetch all poetry/literature sources."""
+def fetch_poetry() -> tuple[list[HeadlineTuple], int, int]:
+    """Fetch all poetry/literature sources concurrently."""
    items = []
    linked = failed = 0
-    for label, url in POETRY_SOURCES.items():
-        stanzas = _fetch_gutenberg(url, label)
-        if stanzas:
-            boot_ln(label, f"LOADED [{len(stanzas)}]", True)
-            items.extend(stanzas)
-            linked += 1
-        else:
-            boot_ln(label, "DARK", False)
-            failed += 1
+
+    with ThreadPoolExecutor(max_workers=DEFAULT_MAX_WORKERS) as executor:
+        futures = {
+            executor.submit(_fetch_gutenberg, url, label): label
+            for label, url in POETRY_SOURCES.items()
+        }
+        for future in as_completed(futures):
+            label = futures[future]
+            stanzas = future.result()
+            if stanzas:
+                boot_ln(label, f"LOADED [{len(stanzas)}]", True)
+                items.extend(stanzas)
+                linked += 1
+            else:
+                boot_ln(label, "DARK", False)
+                failed += 1
+
    return items, linked, failed


-# ─── CACHE ────────────────────────────────────────────────
-# Cache moved to engine/fixtures/headlines.json
-_CACHE_DIR = pathlib.Path(__file__).resolve().parent / "fixtures"
+_cache_dir = pathlib.Path(__file__).resolve().parent / "fixtures"


 def _cache_path():
-    return _CACHE_DIR / "headlines.json"
+    return _cache_dir / "headlines.json"


 def load_cache():
@@ -144,3 +193,6 @@ def save_cache(items):
        _cache_path().write_text(json.dumps({"items": items}))
    except Exception:
        pass
+
+
+_fast_start_urls: set = set()
--- a/engine/fixtures/headlines.json
+++ b/engine/fixtures/headlines.json
@@ -1,19 +1 @@
-{
-  "items": [
-    ["Breaking: AI systems achieve breakthrough in natural language understanding", "TechDaily", "14:32"],
-    ["Scientists discover new exoplanet in habitable zone", "ScienceNews", "13:15"],
-    ["Global markets rally as inflation shows signs of cooling", "FinanceWire", "12:48"],
-    ["New study reveals benefits of Mediterranean diet for cognitive health", "HealthJournal", "11:22"],
-    ["Tech giants announce collaboration on AI safety standards", "TechDaily", "10:55"],
-    ["Archaeologists uncover 3000-year-old city in desert", "HistoryNow", "09:30"],
-    ["Renewable energy capacity surpasses fossil fuels for first time", "GreenWorld", "08:15"],
-    ["Space agency prepares for next Mars mission launch window", "SpaceNews", "07:42"],
-    ["New film breaks box office records on opening weekend", "EntertainmentHub", "06:18"],
-    ["Local community raises funds for new library project", "CommunityPost", "05:30"],
-    ["Quantum computing breakthrough could revolutionize cryptography", "TechWeekly", "15:20"],
-    ["New species of deep-sea creature discovered in Pacific trench", "NatureToday", "14:05"],
-    ["Electric vehicle sales surpass traditional cars in Europe", "AutoNews", "12:33"],
-    ["Renowned artist unveils interactive AI-generated exhibition", "ArtsMonthly", "11:10"],
-    ["Climate summit reaches historic agreement on emissions", "WorldNews", "09:55"]
-  ]
-}
+{"items": []}
--- a/engine/pipeline/adapters/camera.py
+++ b/engine/pipeline/adapters/camera.py
@@ -62,6 +62,16 @@ class CameraClockStage(Stage):
        if data is None:
            return data

+        # Update camera speed from params if explicitly set (for dynamic modulation)
+        # Only update if camera_speed in params differs from the default (1.0)
+        # This preserves camera speed set during construction
+        if (
+            ctx.params
+            and hasattr(ctx.params, "camera_speed")
+            and ctx.params.camera_speed != 1.0
+        ):
+            self._camera.set_speed(ctx.params.camera_speed)
+
        current_time = time.perf_counter()
        dt = 0.0
        if self._last_frame_time is not None:
--- a/engine/pipeline/params.py
+++ b/engine/pipeline/params.py
@@ -32,7 +32,7 @@ class PipelineParams:

    # Camera config
    camera_mode: str = "vertical"
-    camera_speed: float = 1.0
+    camera_speed: float = 1.0  # Default speed
    camera_x: int = 0  # For horizontal scrolling

    # Effect config
--- a/engine/pipeline/presets.py
+++ b/engine/pipeline/presets.py
@@ -11,11 +11,14 @@ Loading order:
 """

 from dataclasses import dataclass, field
-from typing import Any
+from typing import TYPE_CHECKING, Any

 from engine.display import BorderMode
 from engine.pipeline.params import PipelineParams

+if TYPE_CHECKING:
+    from engine.pipeline.controller import PipelineConfig
+

 def _load_toml_presets() -> dict[str, Any]:
    """Load presets from TOML file."""
@@ -55,9 +58,10 @@ class PipelinePreset:
    viewport_width: int = 80  # Viewport width in columns
    viewport_height: int = 24  # Viewport height in rows
    source_items: list[dict[str, Any]] | None = None  # For ListDataSource
+    enable_metrics: bool = True  # Enable performance metrics collection

    def to_params(self) -> PipelineParams:
-        """Convert to PipelineParams."""
+        """Convert to PipelineParams (runtime configuration)."""
        from engine.display import BorderMode

        params = PipelineParams()
@@ -72,10 +76,27 @@ class PipelinePreset:
        )
        params.camera_mode = self.camera
        params.effect_order = self.effects.copy()
-        # Note: camera_speed, viewport_width/height are not stored in PipelineParams
-        # They are used directly from the preset object in pipeline_runner.py
+        params.camera_speed = self.camera_speed
+        # Note: viewport_width/height are read from PipelinePreset directly
+        # in pipeline_runner.py, not from PipelineParams
        return params

+    def to_config(self) -> "PipelineConfig":
+        """Convert to PipelineConfig (static pipeline construction config).
+
+        PipelineConfig is used once at pipeline initialization and contains
+        the core settings that don't change during execution.
+        """
+        from engine.pipeline.controller import PipelineConfig
+
+        return PipelineConfig(
+            source=self.source,
+            display=self.display,
+            camera=self.camera,
+            effects=self.effects.copy(),
+            enable_metrics=self.enable_metrics,
+        )
+
    @classmethod
    def from_yaml(cls, name: str, data: dict[str, Any]) -> "PipelinePreset":
        """Create a PipelinePreset from YAML data."""
@@ -91,6 +112,7 @@ class PipelinePreset:
            viewport_width=data.get("viewport_width", 80),
            viewport_height=data.get("viewport_height", 24),
            source_items=data.get("source_items"),
+            enable_metrics=data.get("enable_metrics", True),
        )