""" Data source abstraction - Treat data sources as first-class citizens in the pipeline. Each data source implements a common interface: - name: Display name for the source - fetch(): Fetch fresh data - stream(): Stream data continuously (optional) - get_items(): Get current items """ from abc import ABC, abstractmethod from dataclasses import dataclass from typing import Any @dataclass class SourceItem: """A single item from a data source.""" content: str source: str timestamp: str metadata: dict[str, Any] | None = None class DataSource(ABC): """Abstract base class for data sources. Static sources: Data fetched once and cached. Safe to call fetch() multiple times. Dynamic sources: Data changes over time. fetch() should be idempotent. """ @property @abstractmethod def name(self) -> str: """Display name for this source.""" ... @property def is_dynamic(self) -> bool: """Whether this source updates dynamically while the app runs. Default False.""" return False @abstractmethod def fetch(self) -> list[SourceItem]: """Fetch fresh data from the source. Must be idempotent.""" ... def get_items(self) -> list[SourceItem]: """Get current items. Default implementation returns cached fetch results.""" if not hasattr(self, "_items") or self._items is None: self._items = self.fetch() return self._items def refresh(self) -> list[SourceItem]: """Force refresh - clear cache and fetch fresh data.""" self._items = self.fetch() return self._items def stream(self): """Optional: Yield items continuously. Override for streaming sources.""" raise NotImplementedError def __post_init__(self): self._items: list[SourceItem] | None = None class HeadlinesDataSource(DataSource): """Data source for RSS feed headlines.""" @property def name(self) -> str: return "headlines" def fetch(self) -> list[SourceItem]: from engine.fetch import fetch_all items, _, _ = fetch_all() return [SourceItem(content=t, source=s, timestamp=ts) for t, s, ts in items] class PoetryDataSource(DataSource): """Data source for Poetry DB.""" @property def name(self) -> str: return "poetry" def fetch(self) -> list[SourceItem]: from engine.fetch import fetch_poetry items, _, _ = fetch_poetry() return [SourceItem(content=t, source=s, timestamp=ts) for t, s, ts in items] class PipelineDataSource(DataSource): """Data source for pipeline visualization (demo mode). Dynamic - updates every frame.""" def __init__(self, viewport_width: int = 80, viewport_height: int = 24): self.viewport_width = viewport_width self.viewport_height = viewport_height self.frame = 0 @property def name(self) -> str: return "pipeline" @property def is_dynamic(self) -> bool: return True def fetch(self) -> list[SourceItem]: from engine.pipeline_viz import generate_large_network_viewport buffer = generate_large_network_viewport( self.viewport_width, self.viewport_height, self.frame ) self.frame += 1 content = "\n".join(buffer) return [ SourceItem(content=content, source="pipeline", timestamp=f"f{self.frame}") ] def get_items(self) -> list[SourceItem]: return self.fetch() class CachedDataSource(DataSource): """Data source that wraps another source with caching.""" def __init__(self, source: DataSource, max_items: int = 100): self.source = source self.max_items = max_items @property def name(self) -> str: return f"cached:{self.source.name}" def fetch(self) -> list[SourceItem]: items = self.source.fetch() return items[: self.max_items] def get_items(self) -> list[SourceItem]: if not hasattr(self, "_items") or self._items is None: self._items = self.fetch() return self._items class CompositeDataSource(DataSource): """Data source that combines multiple sources.""" def __init__(self, sources: list[DataSource]): self.sources = sources @property def name(self) -> str: return "composite" def fetch(self) -> list[SourceItem]: items = [] for source in self.sources: items.extend(source.fetch()) return items class SourceRegistry: """Registry for data sources.""" def __init__(self): self._sources: dict[str, DataSource] = {} self._default: str | None = None def register(self, source: DataSource, default: bool = False) -> None: self._sources[source.name] = source if default or self._default is None: self._default = source.name def get(self, name: str) -> DataSource | None: return self._sources.get(name) def list_all(self) -> dict[str, DataSource]: return dict(self._sources) def default(self) -> DataSource | None: if self._default: return self._sources.get(self._default) return None def create_headlines(self) -> HeadlinesDataSource: return HeadlinesDataSource() def create_poetry(self) -> PoetryDataSource: return PoetryDataSource() def create_pipeline(self, width: int = 80, height: int = 24) -> PipelineDataSource: return PipelineDataSource(width, height) _global_registry: SourceRegistry | None = None def get_source_registry() -> SourceRegistry: global _global_registry if _global_registry is None: _global_registry = SourceRegistry() return _global_registry def init_default_sources() -> SourceRegistry: """Initialize the default source registry with standard sources.""" registry = get_source_registry() registry.register(HeadlinesDataSource(), default=True) registry.register(PoetryDataSource()) return registry