From c711c5356de911b0b8a6730233c99072c7069f56 Mon Sep 17 00:00:00 2001 From: "peter.fong" Date: Sat, 29 Nov 2025 21:13:12 +0000 Subject: [PATCH] make_scripts toegvoegd --- bookscraper/scraper/book_scraper.py | 137 +++++++++++++++++++++++++++- 1 file changed, 136 insertions(+), 1 deletion(-) diff --git a/bookscraper/scraper/book_scraper.py b/bookscraper/scraper/book_scraper.py index 83b0348..283b556 100644 --- a/bookscraper/scraper/book_scraper.py +++ b/bookscraper/scraper/book_scraper.py @@ -85,6 +85,8 @@ class BookScraper: else: self.download_all() + self.prepare_scripts(self.base_path) + return {"title": self.book_title} # ------------------------------------------------------------ @@ -345,7 +347,17 @@ class BookScraper: time.sleep(wait) attempt += 1 - fname.write_text(ch.text, encoding="utf-8") + if ch.number == 1: + header = ( + f"Description:\n{self.book_description}\n" + f" {ch.url}\n" + "----------------------------------------\n\n" + ) + content = header + ch.text + else: + content = ch.text + + fname.write_text(content, encoding="utf-8") log_debug(f"Saved chapter to v{volume}: {fname}") chapter_delay = float(os.getenv("CHAPTER_DELAY", "2")) log_debug(f"Throttling {chapter_delay}s before next chapter") @@ -389,3 +401,126 @@ class BookScraper: raw = "\n".join(parts) raw = clean_text(raw, self.site.replacements) return raw.strip() + # ------------------------------------------------------------ + # SCRIPT GENERATOR (C# prepareScripts equivalent) + # ------------------------------------------------------------ + + def prepare_scripts(self, path: Path): + log_debug("Preparing scripts...") + + # Alleen volume folders (v1, v2, v3, ...) + dirs = sorted([ + d for d in path.iterdir() + if d.is_dir() and d.name.startswith("v") + ]) + + # -------------------------------------------------------- + # M4B MERGE COMMANDS + # -------------------------------------------------------- + m4b_commands = [] + for d in dirs: + volname = d.name + cmd = ( + 'm4b-tool merge --jobs=4 ' + f'--writer="{self.book_author}" ' + f'--albumartist="{self.book_author}" ' + f'--album="{self.book_title}" ' + f'--name="{self.book_title}" ' + f'--output-file="{self.book_title}-{volname}.m4b" "./{volname}" -vvv' + ) + m4b_commands.append(cmd) + + # Mooie formatting + m4b_joined = " \\\n && ".join(m4b_commands) + "\n\n" + + move_script = r''' + find ./ -maxdepth 1 -name "*.m4b" | while read fname; do + echo "moving $(ls -lah "$fname")" + mv "$fname" ../ + done + '''.strip() + "\n" + + m4b_joined += move_script + + # -------------------------------------------------------- + # SAY TTS SCRIPT (jouw verbeterde versie – identiek voor say.txt en allinone.txt) + # -------------------------------------------------------- + say_script = r'''#!/usr/bin/env bash + + set -euo pipefail + + main_dir="$( cd "$( dirname "$0" )" && pwd )" + + shopt -s nocasematch # case-insensitive matching + + echo "=== TTS START ===" + echo "Main directory: $main_dir" + echo + + for subfolder in "$main_dir"/*; do + if [[ -d "$subfolder" ]]; then + + audiofolder="$subfolder/Audio" + mkdir -p "$audiofolder" + + for entry in "$subfolder"/*.txt; do + [[ -f "$entry" ]] || continue + + fn=$(basename "$entry") + inputfile="$entry" + outputfile="$audiofolder/${fn%.*}.m4b" + + now=$(date +"%T") + + echo "[$now] Processing $fn" + echo "Input : $inputfile" + echo "Output: $outputfile" + + if [[ -f "$outputfile" ]]; then + echo "[$now] EXISTS — skipping" + else + say --voice=Sinji \ + --output-file="$outputfile" \ + --input-file="$inputfile" \ + --file-format=m4bf \ + --quality=127 \ + -r 200 \ + --data-format=aac + fi + + echo + done + fi + done + + echo "=== CLEANUP TINY FILES ===" + find "$main_dir" -name "*.m4b" -size -580c | while read -r fname; do + echo "Deleting: $(ls -lah "$fname")" + rm "$fname" + done + echo + ''' + + # -------------------------------------------------------- + # SCHRIJF BESTANDEN WEG + # -------------------------------------------------------- + + # allinone.txt = TTS + merge + (path / "allinone.txt").write_text( + say_script + "\n" + m4b_joined, + encoding="utf-8" + ) + + # makebook.txt = alleen merge + (path / "makebook.txt").write_text( + m4b_joined, + encoding="utf-8" + ) + + # say.txt = alleen TTS (jouw vraag!) + (path / "say.txt").write_text( + say_script, + encoding="utf-8" + ) + + log_debug("Script files written: allinone.txt, makebook.txt, say.txt")