You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/utils.py

37 lines
795 B

# scraper/utils.py
from pathlib import Path
def load_replacements(path="text_replacements.txt") -> dict:
"""
Load key=value replacements from a simple text file.
Lines beginning with # are ignored.
"""
fp = Path(path)
if not fp.exists():
return {}
repl = {}
for line in fp.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line or line.startswith("#"):
continue
if "=" in line:
k, v = line.split("=", 1)
repl[k.strip()] = v.strip()
return repl
def clean_text(raw: str, repl_dict: dict) -> str:
"""
Cleans text using user-defined replacements.
"""
txt = raw
for k, v in repl_dict.items():
txt = txt.replace(k, v)
return txt.strip()