forked from genewildish/Mainline
- Add TransformDataSource for filtering/mapping source items - Add MetricsDataSource for rendering live pipeline metrics as ASCII art - Fix display stage registration in StageRegistry - Register sources with both class name and simple name aliases - Fix DisplayStage.init() to pass reuse parameter - Simplify create_default_pipeline to use DataSourceStage wrapper - Set pygame as default display - Remove old pipeline tasks from mise.toml - Add tests for new pipeline architecture
363 lines
10 KiB
Python
363 lines
10 KiB
Python
"""
|
|
Data source abstraction - Treat data sources as first-class citizens in the pipeline.
|
|
|
|
Each data source implements a common interface:
|
|
- name: Display name for the source
|
|
- fetch(): Fetch fresh data
|
|
- stream(): Stream data continuously (optional)
|
|
- get_items(): Get current items
|
|
"""
|
|
|
|
from abc import ABC, abstractmethod
|
|
from collections.abc import Callable
|
|
from dataclasses import dataclass
|
|
from typing import Any
|
|
|
|
|
|
@dataclass
|
|
class SourceItem:
|
|
"""A single item from a data source."""
|
|
|
|
content: str
|
|
source: str
|
|
timestamp: str
|
|
metadata: dict[str, Any] | None = None
|
|
|
|
|
|
class DataSource(ABC):
|
|
"""Abstract base class for data sources.
|
|
|
|
Static sources: Data fetched once and cached. Safe to call fetch() multiple times.
|
|
Dynamic sources: Data changes over time. fetch() should be idempotent.
|
|
"""
|
|
|
|
@property
|
|
@abstractmethod
|
|
def name(self) -> str:
|
|
"""Display name for this source."""
|
|
...
|
|
|
|
@property
|
|
def is_dynamic(self) -> bool:
|
|
"""Whether this source updates dynamically while the app runs. Default False."""
|
|
return False
|
|
|
|
@abstractmethod
|
|
def fetch(self) -> list[SourceItem]:
|
|
"""Fetch fresh data from the source. Must be idempotent."""
|
|
...
|
|
|
|
def get_items(self) -> list[SourceItem]:
|
|
"""Get current items. Default implementation returns cached fetch results."""
|
|
if not hasattr(self, "_items") or self._items is None:
|
|
self._items = self.fetch()
|
|
return self._items
|
|
|
|
def refresh(self) -> list[SourceItem]:
|
|
"""Force refresh - clear cache and fetch fresh data."""
|
|
self._items = self.fetch()
|
|
return self._items
|
|
|
|
def stream(self):
|
|
"""Optional: Yield items continuously. Override for streaming sources."""
|
|
raise NotImplementedError
|
|
|
|
def __post_init__(self):
|
|
self._items: list[SourceItem] | None = None
|
|
|
|
|
|
class HeadlinesDataSource(DataSource):
|
|
"""Data source for RSS feed headlines."""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "headlines"
|
|
|
|
def fetch(self) -> list[SourceItem]:
|
|
from engine.fetch import fetch_all
|
|
|
|
items, _, _ = fetch_all()
|
|
return [SourceItem(content=t, source=s, timestamp=ts) for t, s, ts in items]
|
|
|
|
|
|
class PoetryDataSource(DataSource):
|
|
"""Data source for Poetry DB."""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "poetry"
|
|
|
|
def fetch(self) -> list[SourceItem]:
|
|
from engine.fetch import fetch_poetry
|
|
|
|
items, _, _ = fetch_poetry()
|
|
return [SourceItem(content=t, source=s, timestamp=ts) for t, s, ts in items]
|
|
|
|
|
|
class PipelineDataSource(DataSource):
|
|
"""Data source for pipeline visualization (demo mode). Dynamic - updates every frame."""
|
|
|
|
def __init__(self, viewport_width: int = 80, viewport_height: int = 24):
|
|
self.viewport_width = viewport_width
|
|
self.viewport_height = viewport_height
|
|
self.frame = 0
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "pipeline"
|
|
|
|
@property
|
|
def is_dynamic(self) -> bool:
|
|
return True
|
|
|
|
def fetch(self) -> list[SourceItem]:
|
|
from engine.pipeline_viz import generate_large_network_viewport
|
|
|
|
buffer = generate_large_network_viewport(
|
|
self.viewport_width, self.viewport_height, self.frame
|
|
)
|
|
self.frame += 1
|
|
content = "\n".join(buffer)
|
|
return [
|
|
SourceItem(content=content, source="pipeline", timestamp=f"f{self.frame}")
|
|
]
|
|
|
|
def get_items(self) -> list[SourceItem]:
|
|
return self.fetch()
|
|
|
|
|
|
class MetricsDataSource(DataSource):
|
|
"""Data source that renders live pipeline metrics as ASCII art.
|
|
|
|
Wraps a Pipeline and displays active stages with their average execution
|
|
time and approximate FPS impact. Updates lazily when camera is about to
|
|
focus on a new node (frame % 15 == 12).
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
pipeline: Any,
|
|
viewport_width: int = 80,
|
|
viewport_height: int = 24,
|
|
):
|
|
self.pipeline = pipeline
|
|
self.viewport_width = viewport_width
|
|
self.viewport_height = viewport_height
|
|
self.frame = 0
|
|
self._cached_metrics: dict | None = None
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "metrics"
|
|
|
|
@property
|
|
def is_dynamic(self) -> bool:
|
|
return True
|
|
|
|
def fetch(self) -> list[SourceItem]:
|
|
if self.frame % 15 == 12:
|
|
self._cached_metrics = None
|
|
|
|
if self._cached_metrics is None:
|
|
self._cached_metrics = self._fetch_metrics()
|
|
|
|
buffer = self._render_metrics(self._cached_metrics)
|
|
self.frame += 1
|
|
content = "\n".join(buffer)
|
|
return [
|
|
SourceItem(content=content, source="metrics", timestamp=f"f{self.frame}")
|
|
]
|
|
|
|
def _fetch_metrics(self) -> dict:
|
|
if hasattr(self.pipeline, "get_metrics_summary"):
|
|
metrics = self.pipeline.get_metrics_summary()
|
|
if "error" not in metrics:
|
|
return metrics
|
|
return {"stages": {}, "pipeline": {"avg_ms": 0}}
|
|
|
|
def _render_metrics(self, metrics: dict) -> list[str]:
|
|
stages = metrics.get("stages", {})
|
|
|
|
if not stages:
|
|
return self._render_empty()
|
|
|
|
active_stages = {
|
|
name: stats for name, stats in stages.items() if stats.get("avg_ms", 0) > 0
|
|
}
|
|
|
|
if not active_stages:
|
|
return self._render_empty()
|
|
|
|
total_avg = sum(s["avg_ms"] for s in active_stages.values())
|
|
if total_avg == 0:
|
|
total_avg = 1
|
|
|
|
lines: list[str] = []
|
|
lines.append("═" * self.viewport_width)
|
|
lines.append(" PIPELINE METRICS ".center(self.viewport_width, "─"))
|
|
lines.append("─" * self.viewport_width)
|
|
|
|
header = f"{'STAGE':<20} {'AVG_MS':>8} {'FPS %':>8}"
|
|
lines.append(header)
|
|
lines.append("─" * self.viewport_width)
|
|
|
|
for name, stats in sorted(active_stages.items()):
|
|
avg_ms = stats.get("avg_ms", 0)
|
|
fps_impact = (avg_ms / 16.67) * 100 if avg_ms > 0 else 0
|
|
|
|
row = f"{name:<20} {avg_ms:>7.2f} {fps_impact:>7.1f}%"
|
|
lines.append(row[: self.viewport_width])
|
|
|
|
lines.append("─" * self.viewport_width)
|
|
total_row = (
|
|
f"{'TOTAL':<20} {total_avg:>7.2f} {(total_avg / 16.67) * 100:>7.1f}%"
|
|
)
|
|
lines.append(total_row[: self.viewport_width])
|
|
lines.append("─" * self.viewport_width)
|
|
lines.append(
|
|
f" Frame:{self.frame:04d} Cache:{'HIT' if self._cached_metrics else 'MISS'}"
|
|
)
|
|
|
|
while len(lines) < self.viewport_height:
|
|
lines.append(" " * self.viewport_width)
|
|
|
|
return lines[: self.viewport_height]
|
|
|
|
def _render_empty(self) -> list[str]:
|
|
lines = [" " * self.viewport_width for _ in range(self.viewport_height)]
|
|
msg = "No metrics available"
|
|
y = self.viewport_height // 2
|
|
x = (self.viewport_width - len(msg)) // 2
|
|
lines[y] = " " * x + msg + " " * (self.viewport_width - x - len(msg))
|
|
return lines
|
|
|
|
def get_items(self) -> list[SourceItem]:
|
|
return self.fetch()
|
|
|
|
|
|
class CachedDataSource(DataSource):
|
|
"""Data source that wraps another source with caching."""
|
|
|
|
def __init__(self, source: DataSource, max_items: int = 100):
|
|
self.source = source
|
|
self.max_items = max_items
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return f"cached:{self.source.name}"
|
|
|
|
def fetch(self) -> list[SourceItem]:
|
|
items = self.source.fetch()
|
|
return items[: self.max_items]
|
|
|
|
def get_items(self) -> list[SourceItem]:
|
|
if not hasattr(self, "_items") or self._items is None:
|
|
self._items = self.fetch()
|
|
return self._items
|
|
|
|
|
|
class TransformDataSource(DataSource):
|
|
"""Data source that transforms items from another source.
|
|
|
|
Applies optional filter and map functions to each item.
|
|
This enables chaining: source → transform → transformed output.
|
|
|
|
Args:
|
|
source: The source to fetch items from
|
|
filter_fn: Optional function(item: SourceItem) -> bool
|
|
map_fn: Optional function(item: SourceItem) -> SourceItem
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
source: DataSource,
|
|
filter_fn: Callable[[SourceItem], bool] | None = None,
|
|
map_fn: Callable[[SourceItem], SourceItem] | None = None,
|
|
):
|
|
self.source = source
|
|
self.filter_fn = filter_fn
|
|
self.map_fn = map_fn
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return f"transform:{self.source.name}"
|
|
|
|
def fetch(self) -> list[SourceItem]:
|
|
items = self.source.fetch()
|
|
|
|
if self.filter_fn:
|
|
items = [item for item in items if self.filter_fn(item)]
|
|
|
|
if self.map_fn:
|
|
items = [self.map_fn(item) for item in items]
|
|
|
|
return items
|
|
|
|
|
|
class CompositeDataSource(DataSource):
|
|
"""Data source that combines multiple sources."""
|
|
|
|
def __init__(self, sources: list[DataSource]):
|
|
self.sources = sources
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "composite"
|
|
|
|
def fetch(self) -> list[SourceItem]:
|
|
items = []
|
|
for source in self.sources:
|
|
items.extend(source.fetch())
|
|
return items
|
|
|
|
|
|
class SourceRegistry:
|
|
"""Registry for data sources."""
|
|
|
|
def __init__(self):
|
|
self._sources: dict[str, DataSource] = {}
|
|
self._default: str | None = None
|
|
|
|
def register(self, source: DataSource, default: bool = False) -> None:
|
|
self._sources[source.name] = source
|
|
if default or self._default is None:
|
|
self._default = source.name
|
|
|
|
def get(self, name: str) -> DataSource | None:
|
|
return self._sources.get(name)
|
|
|
|
def list_all(self) -> dict[str, DataSource]:
|
|
return dict(self._sources)
|
|
|
|
def default(self) -> DataSource | None:
|
|
if self._default:
|
|
return self._sources.get(self._default)
|
|
return None
|
|
|
|
def create_headlines(self) -> HeadlinesDataSource:
|
|
return HeadlinesDataSource()
|
|
|
|
def create_poetry(self) -> PoetryDataSource:
|
|
return PoetryDataSource()
|
|
|
|
def create_pipeline(self, width: int = 80, height: int = 24) -> PipelineDataSource:
|
|
return PipelineDataSource(width, height)
|
|
|
|
|
|
_global_registry: SourceRegistry | None = None
|
|
|
|
|
|
def get_source_registry() -> SourceRegistry:
|
|
global _global_registry
|
|
if _global_registry is None:
|
|
_global_registry = SourceRegistry()
|
|
return _global_registry
|
|
|
|
|
|
def init_default_sources() -> SourceRegistry:
|
|
"""Initialize the default source registry with standard sources."""
|
|
registry = get_source_registry()
|
|
registry.register(HeadlinesDataSource(), default=True)
|
|
registry.register(PoetryDataSource())
|
|
return registry
|