forked from genewildish/Mainline
refactor: Create engine package, extracting data sources to sources.py, and add refactoring documentation.
This commit is contained in:
115
engine/sources.py
Normal file
115
engine/sources.py
Normal file
@@ -0,0 +1,115 @@
|
||||
"""
|
||||
Data sources: feed URLs, poetry sources, language mappings, script fonts.
|
||||
Pure data — no logic, no dependencies.
|
||||
"""
|
||||
|
||||
# ─── RSS FEEDS ────────────────────────────────────────────
|
||||
FEEDS = {
|
||||
# Science & Technology
|
||||
"Nature": "https://www.nature.com/nature.rss",
|
||||
"Science Daily": "https://www.sciencedaily.com/rss/all.xml",
|
||||
"Phys.org": "https://phys.org/rss-feed/",
|
||||
"NASA": "https://www.nasa.gov/news-release/feed/",
|
||||
"Ars Technica": "https://feeds.arstechnica.com/arstechnica/index",
|
||||
"New Scientist": "https://www.newscientist.com/section/news/feed/",
|
||||
"Quanta": "https://api.quantamagazine.org/feed/",
|
||||
"BBC Science": "http://feeds.bbci.co.uk/news/science_and_environment/rss.xml",
|
||||
"MIT Tech Review": "https://www.technologyreview.com/feed/",
|
||||
# Economics & Business
|
||||
"BBC Business": "http://feeds.bbci.co.uk/news/business/rss.xml",
|
||||
"MarketWatch": "https://feeds.marketwatch.com/marketwatch/topstories/",
|
||||
"Economist": "https://www.economist.com/finance-and-economics/rss.xml",
|
||||
# World & Politics
|
||||
"BBC World": "http://feeds.bbci.co.uk/news/world/rss.xml",
|
||||
"NPR": "https://feeds.npr.org/1001/rss.xml",
|
||||
"Al Jazeera": "https://www.aljazeera.com/xml/rss/all.xml",
|
||||
"Guardian World": "https://www.theguardian.com/world/rss",
|
||||
"DW": "https://rss.dw.com/rdf/rss-en-all",
|
||||
"France24": "https://www.france24.com/en/rss",
|
||||
"ABC Australia": "https://www.abc.net.au/news/feed/2942460/rss.xml",
|
||||
"Japan Times": "https://www.japantimes.co.jp/feed/",
|
||||
"The Hindu": "https://www.thehindu.com/news/national/feeder/default.rss",
|
||||
"SCMP": "https://www.scmp.com/rss/91/feed",
|
||||
"Der Spiegel": "https://www.spiegel.de/international/index.rss",
|
||||
# Culture & Ideas
|
||||
"Guardian Culture": "https://www.theguardian.com/culture/rss",
|
||||
"Aeon": "https://aeon.co/feed.rss",
|
||||
"Smithsonian": "https://www.smithsonianmag.com/rss/latest_articles/",
|
||||
"The Marginalian": "https://www.themarginalian.org/feed/",
|
||||
"Nautilus": "https://nautil.us/feed/",
|
||||
"Wired": "https://www.wired.com/feed/rss",
|
||||
"The Conversation": "https://theconversation.com/us/articles.atom",
|
||||
"Longreads": "https://longreads.com/feed/",
|
||||
"Literary Hub": "https://lithub.com/feed/",
|
||||
"Atlas Obscura": "https://www.atlasobscura.com/feeds/latest",
|
||||
}
|
||||
|
||||
# ─── POETRY / LITERATURE ─────────────────────────────────
|
||||
# Public domain via Project Gutenberg
|
||||
POETRY_SOURCES = {
|
||||
"Whitman": "https://www.gutenberg.org/cache/epub/1322/pg1322.txt",
|
||||
"Dickinson": "https://www.gutenberg.org/cache/epub/12242/pg12242.txt",
|
||||
"Whitman II": "https://www.gutenberg.org/cache/epub/8388/pg8388.txt",
|
||||
"Rilke": "https://www.gutenberg.org/cache/epub/38594/pg38594.txt",
|
||||
"Pound": "https://www.gutenberg.org/cache/epub/41162/pg41162.txt",
|
||||
"Pound II": "https://www.gutenberg.org/cache/epub/51992/pg51992.txt",
|
||||
"Eliot": "https://www.gutenberg.org/cache/epub/1567/pg1567.txt",
|
||||
"Yeats": "https://www.gutenberg.org/cache/epub/38877/pg38877.txt",
|
||||
"Masters": "https://www.gutenberg.org/cache/epub/1280/pg1280.txt",
|
||||
"Baudelaire": "https://www.gutenberg.org/cache/epub/36098/pg36098.txt",
|
||||
"Crane": "https://www.gutenberg.org/cache/epub/40786/pg40786.txt",
|
||||
"Poe": "https://www.gutenberg.org/cache/epub/10031/pg10031.txt",
|
||||
}
|
||||
|
||||
# ─── SOURCE → LANGUAGE MAPPING ───────────────────────────
|
||||
# Headlines from these outlets render in their cultural home language
|
||||
SOURCE_LANGS = {
|
||||
"Der Spiegel": "de",
|
||||
"DW": "de",
|
||||
"France24": "fr",
|
||||
"Japan Times": "ja",
|
||||
"The Hindu": "hi",
|
||||
"SCMP": "zh-cn",
|
||||
"Al Jazeera": "ar",
|
||||
}
|
||||
|
||||
# ─── LOCATION → LANGUAGE ─────────────────────────────────
|
||||
LOCATION_LANGS = {
|
||||
r'\b(?:china|chinese|beijing|shanghai|hong kong|xi jinping)\b': 'zh-cn',
|
||||
r'\b(?:japan|japanese|tokyo|osaka|kishida)\b': 'ja',
|
||||
r'\b(?:korea|korean|seoul|pyongyang)\b': 'ko',
|
||||
r'\b(?:russia|russian|moscow|kremlin|putin)\b': 'ru',
|
||||
r'\b(?:saudi|dubai|qatar|egypt|cairo|arabic)\b': 'ar',
|
||||
r'\b(?:india|indian|delhi|mumbai|modi)\b': 'hi',
|
||||
r'\b(?:germany|german|berlin|munich|scholz)\b': 'de',
|
||||
r'\b(?:france|french|paris|lyon|macron)\b': 'fr',
|
||||
r'\b(?:spain|spanish|madrid)\b': 'es',
|
||||
r'\b(?:italy|italian|rome|milan|meloni)\b': 'it',
|
||||
r'\b(?:portugal|portuguese|lisbon)\b': 'pt',
|
||||
r'\b(?:brazil|brazilian|são paulo|lula)\b': 'pt',
|
||||
r'\b(?:greece|greek|athens)\b': 'el',
|
||||
r'\b(?:turkey|turkish|istanbul|ankara|erdogan)\b': 'tr',
|
||||
r'\b(?:iran|iranian|tehran)\b': 'fa',
|
||||
r'\b(?:thailand|thai|bangkok)\b': 'th',
|
||||
r'\b(?:vietnam|vietnamese|hanoi)\b': 'vi',
|
||||
r'\b(?:ukraine|ukrainian|kyiv|kiev|zelensky)\b': 'uk',
|
||||
r'\b(?:israel|israeli|jerusalem|tel aviv|netanyahu)\b': 'he',
|
||||
}
|
||||
|
||||
# ─── NON-LATIN SCRIPT FONTS (macOS) ──────────────────────
|
||||
SCRIPT_FONTS = {
|
||||
'zh-cn': '/System/Library/Fonts/STHeiti Medium.ttc',
|
||||
'ja': '/System/Library/Fonts/ヒラギノ角ゴシック W9.ttc',
|
||||
'ko': '/System/Library/Fonts/AppleSDGothicNeo.ttc',
|
||||
'ru': '/System/Library/Fonts/Supplemental/Arial.ttf',
|
||||
'uk': '/System/Library/Fonts/Supplemental/Arial.ttf',
|
||||
'el': '/System/Library/Fonts/Supplemental/Arial.ttf',
|
||||
'he': '/System/Library/Fonts/Supplemental/Arial.ttf',
|
||||
'ar': '/System/Library/Fonts/GeezaPro.ttc',
|
||||
'fa': '/System/Library/Fonts/GeezaPro.ttc',
|
||||
'hi': '/System/Library/Fonts/Kohinoor.ttc',
|
||||
'th': '/System/Library/Fonts/ThonburiUI.ttc',
|
||||
}
|
||||
|
||||
# Scripts that have no uppercase
|
||||
NO_UPPER = {'zh-cn', 'ja', 'ko', 'ar', 'fa', 'hi', 'th', 'he'}
|
||||
Reference in New Issue
Block a user