""" Data source abstraction - Treat data sources as first-class citizens in the pipeline. Each data source implements a common interface: - name: Display name for the source - fetch(): Fetch fresh data - stream(): Stream data continuously (optional) - get_items(): Get current items """ from abc import ABC, abstractmethod from collections.abc import Callable from dataclasses import dataclass from typing import Any @dataclass class SourceItem: """A single item from a data source.""" content: str source: str timestamp: str metadata: dict[str, Any] | None = None class DataSource(ABC): """Abstract base class for data sources. Static sources: Data fetched once and cached. Safe to call fetch() multiple times. Dynamic sources: Data changes over time. fetch() should be idempotent. """ @property @abstractmethod def name(self) -> str: """Display name for this source.""" ... @property def is_dynamic(self) -> bool: """Whether this source updates dynamically while the app runs. Default False.""" return False @abstractmethod def fetch(self) -> list[SourceItem]: """Fetch fresh data from the source. Must be idempotent.""" ... def get_items(self) -> list[SourceItem]: """Get current items. Default implementation returns cached fetch results.""" if not hasattr(self, "_items") or self._items is None: self._items = self.fetch() return self._items def refresh(self) -> list[SourceItem]: """Force refresh - clear cache and fetch fresh data.""" self._items = self.fetch() return self._items def stream(self): """Optional: Yield items continuously. Override for streaming sources.""" raise NotImplementedError def __post_init__(self): self._items: list[SourceItem] | None = None class HeadlinesDataSource(DataSource): """Data source for RSS feed headlines.""" @property def name(self) -> str: return "headlines" def fetch(self) -> list[SourceItem]: from engine.fetch import fetch_all items, _, _ = fetch_all() return [SourceItem(content=t, source=s, timestamp=ts) for t, s, ts in items] class PoetryDataSource(DataSource): """Data source for Poetry DB.""" @property def name(self) -> str: return "poetry" def fetch(self) -> list[SourceItem]: from engine.fetch import fetch_poetry items, _, _ = fetch_poetry() return [SourceItem(content=t, source=s, timestamp=ts) for t, s, ts in items] class PipelineDataSource(DataSource): """Data source for pipeline visualization (demo mode). Dynamic - updates every frame.""" def __init__(self, viewport_width: int = 80, viewport_height: int = 24): self.viewport_width = viewport_width self.viewport_height = viewport_height self.frame = 0 @property def name(self) -> str: return "pipeline" @property def is_dynamic(self) -> bool: return True def fetch(self) -> list[SourceItem]: from engine.pipeline_viz import generate_large_network_viewport buffer = generate_large_network_viewport( self.viewport_width, self.viewport_height, self.frame ) self.frame += 1 content = "\n".join(buffer) return [ SourceItem(content=content, source="pipeline", timestamp=f"f{self.frame}") ] def get_items(self) -> list[SourceItem]: return self.fetch() class MetricsDataSource(DataSource): """Data source that renders live pipeline metrics as ASCII art. Wraps a Pipeline and displays active stages with their average execution time and approximate FPS impact. Updates lazily when camera is about to focus on a new node (frame % 15 == 12). """ def __init__( self, pipeline: Any, viewport_width: int = 80, viewport_height: int = 24, ): self.pipeline = pipeline self.viewport_width = viewport_width self.viewport_height = viewport_height self.frame = 0 self._cached_metrics: dict | None = None @property def name(self) -> str: return "metrics" @property def is_dynamic(self) -> bool: return True def fetch(self) -> list[SourceItem]: if self.frame % 15 == 12: self._cached_metrics = None if self._cached_metrics is None: self._cached_metrics = self._fetch_metrics() buffer = self._render_metrics(self._cached_metrics) self.frame += 1 content = "\n".join(buffer) return [ SourceItem(content=content, source="metrics", timestamp=f"f{self.frame}") ] def _fetch_metrics(self) -> dict: if hasattr(self.pipeline, "get_metrics_summary"): metrics = self.pipeline.get_metrics_summary() if "error" not in metrics: return metrics return {"stages": {}, "pipeline": {"avg_ms": 0}} def _render_metrics(self, metrics: dict) -> list[str]: stages = metrics.get("stages", {}) if not stages: return self._render_empty() active_stages = { name: stats for name, stats in stages.items() if stats.get("avg_ms", 0) > 0 } if not active_stages: return self._render_empty() total_avg = sum(s["avg_ms"] for s in active_stages.values()) if total_avg == 0: total_avg = 1 lines: list[str] = [] lines.append("═" * self.viewport_width) lines.append(" PIPELINE METRICS ".center(self.viewport_width, "─")) lines.append("─" * self.viewport_width) header = f"{'STAGE':<20} {'AVG_MS':>8} {'FPS %':>8}" lines.append(header) lines.append("─" * self.viewport_width) for name, stats in sorted(active_stages.items()): avg_ms = stats.get("avg_ms", 0) fps_impact = (avg_ms / 16.67) * 100 if avg_ms > 0 else 0 row = f"{name:<20} {avg_ms:>7.2f} {fps_impact:>7.1f}%" lines.append(row[: self.viewport_width]) lines.append("─" * self.viewport_width) total_row = ( f"{'TOTAL':<20} {total_avg:>7.2f} {(total_avg / 16.67) * 100:>7.1f}%" ) lines.append(total_row[: self.viewport_width]) lines.append("─" * self.viewport_width) lines.append( f" Frame:{self.frame:04d} Cache:{'HIT' if self._cached_metrics else 'MISS'}" ) while len(lines) < self.viewport_height: lines.append(" " * self.viewport_width) return lines[: self.viewport_height] def _render_empty(self) -> list[str]: lines = [" " * self.viewport_width for _ in range(self.viewport_height)] msg = "No metrics available" y = self.viewport_height // 2 x = (self.viewport_width - len(msg)) // 2 lines[y] = " " * x + msg + " " * (self.viewport_width - x - len(msg)) return lines def get_items(self) -> list[SourceItem]: return self.fetch() class CachedDataSource(DataSource): """Data source that wraps another source with caching.""" def __init__(self, source: DataSource, max_items: int = 100): self.source = source self.max_items = max_items @property def name(self) -> str: return f"cached:{self.source.name}" def fetch(self) -> list[SourceItem]: items = self.source.fetch() return items[: self.max_items] def get_items(self) -> list[SourceItem]: if not hasattr(self, "_items") or self._items is None: self._items = self.fetch() return self._items class TransformDataSource(DataSource): """Data source that transforms items from another source. Applies optional filter and map functions to each item. This enables chaining: source → transform → transformed output. Args: source: The source to fetch items from filter_fn: Optional function(item: SourceItem) -> bool map_fn: Optional function(item: SourceItem) -> SourceItem """ def __init__( self, source: DataSource, filter_fn: Callable[[SourceItem], bool] | None = None, map_fn: Callable[[SourceItem], SourceItem] | None = None, ): self.source = source self.filter_fn = filter_fn self.map_fn = map_fn @property def name(self) -> str: return f"transform:{self.source.name}" def fetch(self) -> list[SourceItem]: items = self.source.fetch() if self.filter_fn: items = [item for item in items if self.filter_fn(item)] if self.map_fn: items = [self.map_fn(item) for item in items] return items class CompositeDataSource(DataSource): """Data source that combines multiple sources.""" def __init__(self, sources: list[DataSource]): self.sources = sources @property def name(self) -> str: return "composite" def fetch(self) -> list[SourceItem]: items = [] for source in self.sources: items.extend(source.fetch()) return items class SourceRegistry: """Registry for data sources.""" def __init__(self): self._sources: dict[str, DataSource] = {} self._default: str | None = None def register(self, source: DataSource, default: bool = False) -> None: self._sources[source.name] = source if default or self._default is None: self._default = source.name def get(self, name: str) -> DataSource | None: return self._sources.get(name) def list_all(self) -> dict[str, DataSource]: return dict(self._sources) def default(self) -> DataSource | None: if self._default: return self._sources.get(self._default) return None def create_headlines(self) -> HeadlinesDataSource: return HeadlinesDataSource() def create_poetry(self) -> PoetryDataSource: return PoetryDataSource() def create_pipeline(self, width: int = 80, height: int = 24) -> PipelineDataSource: return PipelineDataSource(width, height) _global_registry: SourceRegistry | None = None def get_source_registry() -> SourceRegistry: global _global_registry if _global_registry is None: _global_registry = SourceRegistry() return _global_registry def init_default_sources() -> SourceRegistry: """Initialize the default source registry with standard sources.""" registry = get_source_registry() registry.register(HeadlinesDataSource(), default=True) registry.register(PoetryDataSource()) return registry