You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
37 lines
795 B
37 lines
795 B
# scraper/utils.py
|
|
from pathlib import Path
|
|
|
|
|
|
def load_replacements(path="text_replacements.txt") -> dict:
|
|
"""
|
|
Load key=value replacements from a simple text file.
|
|
Lines beginning with # are ignored.
|
|
"""
|
|
fp = Path(path)
|
|
if not fp.exists():
|
|
return {}
|
|
|
|
repl = {}
|
|
for line in fp.read_text(encoding="utf-8").splitlines():
|
|
line = line.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
|
|
if "=" in line:
|
|
k, v = line.split("=", 1)
|
|
repl[k.strip()] = v.strip()
|
|
|
|
return repl
|
|
|
|
|
|
def clean_text(raw: str, repl_dict: dict) -> str:
|
|
"""
|
|
Cleans text using user-defined replacements.
|
|
"""
|
|
txt = raw
|
|
|
|
for k, v in repl_dict.items():
|
|
txt = txt.replace(k, v)
|
|
|
|
return txt.strip()
|