feat/display #9
133
engine/fetch.py
Normal file
133
engine/fetch.py
Normal file
@@ -0,0 +1,133 @@
|
||||
"""
|
||||
RSS feed fetching, Project Gutenberg parsing, and headline caching.
|
||||
Depends on: config, sources, filter, terminal.
|
||||
"""
|
||||
|
||||
import re
|
||||
import json
|
||||
import pathlib
|
||||
import urllib.request
|
||||
from datetime import datetime
|
||||
|
||||
import feedparser
|
||||
|
||||
from engine import config
|
||||
from engine.sources import FEEDS, POETRY_SOURCES
|
||||
from engine.filter import strip_tags, skip
|
||||
from engine.terminal import boot_ln
|
||||
|
||||
# ─── SINGLE FEED ──────────────────────────────────────────
|
||||
def fetch_feed(url):
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "mainline/0.1"})
|
||||
resp = urllib.request.urlopen(req, timeout=config.FEED_TIMEOUT)
|
||||
return feedparser.parse(resp.read())
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
# ─── ALL RSS FEEDS ────────────────────────────────────────
|
||||
def fetch_all():
|
||||
items = []
|
||||
linked = failed = 0
|
||||
for src, url in FEEDS.items():
|
||||
feed = fetch_feed(url)
|
||||
if feed is None or (feed.bozo and not feed.entries):
|
||||
boot_ln(src, "DARK", False)
|
||||
failed += 1
|
||||
continue
|
||||
n = 0
|
||||
for e in feed.entries:
|
||||
t = strip_tags(e.get("title", ""))
|
||||
if not t or skip(t):
|
||||
continue
|
||||
pub = e.get("published_parsed") or e.get("updated_parsed")
|
||||
try:
|
||||
ts = datetime(*pub[:6]).strftime("%H:%M") if pub else "——:——"
|
||||
except Exception:
|
||||
ts = "——:——"
|
||||
items.append((t, src, ts))
|
||||
n += 1
|
||||
if n:
|
||||
boot_ln(src, f"LINKED [{n}]", True)
|
||||
linked += 1
|
||||
else:
|
||||
boot_ln(src, "EMPTY", False)
|
||||
failed += 1
|
||||
return items, linked, failed
|
||||
|
||||
|
||||
# ─── PROJECT GUTENBERG ────────────────────────────────────
|
||||
def _fetch_gutenberg(url, label):
|
||||
"""Download and parse stanzas/passages from a Project Gutenberg text."""
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "mainline/0.1"})
|
||||
resp = urllib.request.urlopen(req, timeout=15)
|
||||
text = resp.read().decode('utf-8', errors='replace').replace('\r\n', '\n').replace('\r', '\n')
|
||||
# Strip PG boilerplate
|
||||
m = re.search(r'\*\*\*\s*START OF[^\n]*\n', text)
|
||||
if m:
|
||||
text = text[m.end():]
|
||||
m = re.search(r'\*\*\*\s*END OF', text)
|
||||
if m:
|
||||
text = text[:m.start()]
|
||||
# Split on blank lines into stanzas/passages
|
||||
blocks = re.split(r'\n{2,}', text.strip())
|
||||
items = []
|
||||
for blk in blocks:
|
||||
blk = ' '.join(blk.split()) # flatten to one line
|
||||
if len(blk) < 20 or len(blk) > 280:
|
||||
continue
|
||||
if blk.isupper(): # skip all-caps headers
|
||||
continue
|
||||
if re.match(r'^[IVXLCDM]+\.?\s*$', blk): # roman numerals
|
||||
continue
|
||||
items.append((blk, label, ''))
|
||||
return items
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def fetch_poetry():
|
||||
"""Fetch all poetry/literature sources."""
|
||||
items = []
|
||||
linked = failed = 0
|
||||
for label, url in POETRY_SOURCES.items():
|
||||
stanzas = _fetch_gutenberg(url, label)
|
||||
if stanzas:
|
||||
boot_ln(label, f"LOADED [{len(stanzas)}]", True)
|
||||
items.extend(stanzas)
|
||||
linked += 1
|
||||
else:
|
||||
boot_ln(label, "DARK", False)
|
||||
failed += 1
|
||||
return items, linked, failed
|
||||
|
||||
|
||||
# ─── CACHE ────────────────────────────────────────────────
|
||||
_CACHE_DIR = pathlib.Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def _cache_path():
|
||||
return _CACHE_DIR / f".mainline_cache_{config.MODE}.json"
|
||||
|
||||
|
||||
def load_cache():
|
||||
"""Load cached items from disk if available."""
|
||||
p = _cache_path()
|
||||
if not p.exists():
|
||||
return None
|
||||
try:
|
||||
data = json.loads(p.read_text())
|
||||
items = [tuple(i) for i in data["items"]]
|
||||
return items if items else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def save_cache(items):
|
||||
"""Save fetched items to disk for fast subsequent runs."""
|
||||
try:
|
||||
_cache_path().write_text(json.dumps({"items": items}))
|
||||
except Exception:
|
||||
pass
|
||||
Reference in New Issue
Block a user