fix: apply ruff auto-fixes and add hk git hooks
- Fix pre-existing lint errors in engine/ modules using ruff --unsafe-fixes - Add hk.pkl with pre-commit and pre-push hooks using ruff builtin - Configure hooks to use 'uv run' prefix for tool execution - Update mise.toml to include hk and pkl tools - All 73 tests pass fix: apply ruff auto-fixes and add hk git hooks - Fix pre-existing lint errors in engine/ modules using ruff --unsafe-fixes - Add hk.pkl with pre-commit and pre-push hooks using ruff builtin - Configure hooks to use 'uv run' prefix for tool execution - Update mise.toml to include hk and pkl tools - Use 'hk install --mise' for proper mise integration - All 73 tests pass
This commit is contained in:
@@ -64,26 +64,31 @@ def _fetch_gutenberg(url, label):
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "mainline/0.1"})
|
||||
resp = urllib.request.urlopen(req, timeout=15)
|
||||
text = resp.read().decode('utf-8', errors='replace').replace('\r\n', '\n').replace('\r', '\n')
|
||||
text = (
|
||||
resp.read()
|
||||
.decode("utf-8", errors="replace")
|
||||
.replace("\r\n", "\n")
|
||||
.replace("\r", "\n")
|
||||
)
|
||||
# Strip PG boilerplate
|
||||
m = re.search(r'\*\*\*\s*START OF[^\n]*\n', text)
|
||||
m = re.search(r"\*\*\*\s*START OF[^\n]*\n", text)
|
||||
if m:
|
||||
text = text[m.end():]
|
||||
m = re.search(r'\*\*\*\s*END OF', text)
|
||||
text = text[m.end() :]
|
||||
m = re.search(r"\*\*\*\s*END OF", text)
|
||||
if m:
|
||||
text = text[:m.start()]
|
||||
text = text[: m.start()]
|
||||
# Split on blank lines into stanzas/passages
|
||||
blocks = re.split(r'\n{2,}', text.strip())
|
||||
blocks = re.split(r"\n{2,}", text.strip())
|
||||
items = []
|
||||
for blk in blocks:
|
||||
blk = ' '.join(blk.split()) # flatten to one line
|
||||
blk = " ".join(blk.split()) # flatten to one line
|
||||
if len(blk) < 20 or len(blk) > 280:
|
||||
continue
|
||||
if blk.isupper(): # skip all-caps headers
|
||||
if blk.isupper(): # skip all-caps headers
|
||||
continue
|
||||
if re.match(r'^[IVXLCDM]+\.?\s*$', blk): # roman numerals
|
||||
if re.match(r"^[IVXLCDM]+\.?\s*$", blk): # roman numerals
|
||||
continue
|
||||
items.append((blk, label, ''))
|
||||
items.append((blk, label, ""))
|
||||
return items
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
Reference in New Issue
Block a user