import os
import re
from pathlib import Path


# ------------------------------------------------------------
# Load replacements from text_replacements.txt (optional file)
# ------------------------------------------------------------
def load_replacements(filepath="text_replacements.txt") -> dict:
    """
    Load key=value style replacements.
    Empty or missing file → return {}.
    Lines starting with '#' are ignored.
    """
    path = Path(filepath)

    if not path.exists():
        return {}

    repl = {}

    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line or line.startswith("#"):
                continue
            if "=" in line:
                key, val = line.split("=", 1)
                repl[key.strip()] = val.strip()

    return repl


# ------------------------------------------------------------
# Clean extracted HTML text
# ------------------------------------------------------------
def clean_text(raw: str, repl_dict: dict = None) -> str:
    """
    Normalize whitespace, remove junk, apply replacements.
    repl_dict is optional → {} if none provided.
    """
    if repl_dict is None:
        repl_dict = {}

    txt = raw.replace("\r", "")  # normalize CRLF

    # Collapse 3+ blank lines → max 1 empty line
    txt = re.sub(r"\n{3,}", "\n\n", txt)

    # Apply replacements
    for key, val in repl_dict.items():
        txt = txt.replace(key, val)

    return txt.strip()


# ------------------------------------------------------------
# Determine save path for a chapter (shared by download & save)
# ------------------------------------------------------------
def get_save_path(chapter_num: int, base_path: str) -> str:
    """
    Returns the filesystem path where this chapter should be saved.
    Formats the filename as 0001.txt, 0002.txt, ...
    """

    filename = f"{chapter_num:04d}.txt"
    return os.path.join(base_path, filename)