You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/scriptgen.py

148 lines
4.8 KiB

# scraper/scriptgen.py
# Generates scripts (allinone.txt, makebook.txt, say.txt)
# using external templates + dynamic merge generation.
import os
import stat
from logbus.publisher import log
from scraper.logger_decorators import logcall
TEMPLATE_DIR = os.path.join(os.path.dirname(__file__), "templates")
# ------------------------------------------------------------
# Load a template file from scraper/templates/
# ------------------------------------------------------------
def load_template(name: str) -> str:
path = os.path.join(TEMPLATE_DIR, name)
if not os.path.exists(path):
log(f"[SCRIPTGEN] Template missing: {path}")
return ""
with open(path, "r", encoding="utf-8") as f:
return f.read()
# ------------------------------------------------------------
# Detect volumes (Volume_001, Volume_002, ...)
# ------------------------------------------------------------
def detect_volumes(book_base: str):
vols = []
for name in os.listdir(book_base):
p = os.path.join(book_base, name)
if os.path.isdir(p) and name.lower().startswith("volume_"):
try:
num = int(name.split("_")[1])
vols.append((num, name))
except Exception:
continue
vols.sort()
return vols
# ------------------------------------------------------------
# Build the dynamic merge block
# ------------------------------------------------------------
def build_merge_block(title: str, author: str, volumes):
lines = []
# --------------------------------------------------------
# Normalize input (defensive)
# --------------------------------------------------------
title = (title or "").strip()
author = (author or "").strip()
total_vols = len(volumes)
# Padding-regel:
# - altijd minimaal 2 (01, 02)
# - 3 bij >=100
if total_vols >= 100:
pad = 3
else:
pad = 2
for num, dirname in volumes:
vol_num = f"{num:0{pad}d}" # voor filename
series_part = f"{num:0{pad}d}" # voor series-part (string!)
line = (
f"m4b-tool merge --jobs=4 "
f'--writer="{author}" '
f'--sortalbum="{title}" '
f'--albumartist="{author}" '
f'--album="{title}" '
f'--name="{title}" '
f'--series="{title}" '
f'--series-part="{series_part}" '
f'--output-file="{title}-{vol_num}.m4b" '
f'"{dirname}" -vvv'
)
lines.append(line)
if not lines:
return ""
return " \\\n&& ".join(lines) + "\n"
# ------------------------------------------------------------
# Main generator
# ------------------------------------------------------------
@logcall
def generate_all_scripts(book_base: str, title: str, author: str):
# --------------------------------------------------------
# Defensive normalize
# --------------------------------------------------------
title = (title or "").strip()
author = (author or "").strip()
log(f"[SCRIPTGEN] Generating scripts in {book_base}")
# Load templates
say_template = load_template("say.template")
cleanup_template = load_template("cleanup.template")
volumes = detect_volumes(book_base)
log(f"[SCRIPTGEN] Volumes detected: {volumes}")
merge_block = build_merge_block(title, author, volumes)
# --------------------------------------------------------
# allinone.txt = say + cleanup + merge
# --------------------------------------------------------
outfile = os.path.join(book_base, "allinone.txt")
with open(outfile, "w", encoding="utf-8") as f:
f.write(say_template)
f.write("\n")
f.write(cleanup_template)
f.write("\n")
f.write(merge_block)
os.chmod(outfile, os.stat(outfile).st_mode | stat.S_IEXEC)
log(f"[SCRIPTGEN] Created {outfile}")
# --------------------------------------------------------
# makebook.txt = merge only
# --------------------------------------------------------
outfile2 = os.path.join(book_base, "makebook.txt")
with open(outfile2, "w", encoding="utf-8") as f:
f.write(merge_block)
os.chmod(outfile2, os.stat(outfile2).st_mode | stat.S_IEXEC)
log(f"[SCRIPTGEN] Created {outfile2}")
# --------------------------------------------------------
# say.txt = say + cleanup
# --------------------------------------------------------
outfile3 = os.path.join(book_base, "say.txt")
with open(outfile3, "w", encoding="utf-8") as f:
f.write(say_template)
f.write("\n")
f.write(cleanup_template)
os.chmod(outfile3, os.stat(outfile3).st_mode | stat.S_IEXEC)
log(f"[SCRIPTGEN] Created {outfile3}")
log(f"[SCRIPTGEN] All scripts generated successfully for '{title}'")
__all__ = ["generate_all_scripts"]