Scriptgen template loading + minor controller log improvements

feat/audiotasks
peter.fong 2 weeks ago
parent 9a774c4955
commit 6154b396e3

@ -4,10 +4,12 @@
# Build Celery pipelines for all chapters
# and pass book_id for abort/progress/log functionality.
# + Download and replicate cover image to all volume folders
# + Generate scripts (allinone.txt, makebook.txt, say.txt)
# =========================================================
from celery import group
from scraper.tasks.pipeline import build_chapter_pipeline
from scraper.scriptgen import generate_all_scripts # <-- ADDED
from logbus.publisher import log
import os
import requests
@ -22,6 +24,7 @@ class DownloadController:
- consistent meta propagation
- book_id-based abort + progress tracking
- cover download + volume replication
- script generation (allinone.txt, makebook.txt, say.txt)
"""
def __init__(self, book_id: str, scrape_result: dict):
@ -62,7 +65,6 @@ class DownloadController:
cover_path = os.path.join(self.book_base, "cover.jpg")
# HEADERS that bypass 403 hotlink protection
headers = {
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:118.0) "
@ -145,7 +147,7 @@ class DownloadController:
tasks.append(
build_chapter_pipeline(
self.book_id, # UUID from scraping.py
self.book_id, # UUID
chapter_num,
chapter_url,
volume_path,
@ -165,4 +167,17 @@ class DownloadController:
# -------------------------------------------------------
self.replicate_cover_to_volumes()
# -------------------------------------------------------
# 3) Generate scripts (allinone, makebook, say)
# -------------------------------------------------------
try:
generate_all_scripts(
self.book_base,
self.title,
self.meta.get("author"),
)
log(f"[CTRL] Scripts generated for '{self.title}'")
except Exception as e:
log(f"[CTRL] Script generation failed: {e}")
return async_result

@ -0,0 +1,112 @@
# scraper/scriptgen.py
# Generates scripts (allinone.txt, makebook.txt, say.txt)
# using external templates + dynamic merge generation.
import os
import stat
from logbus.publisher import log
TEMPLATE_DIR = os.path.join(os.path.dirname(__file__), "templates")
# ------------------------------------------------------------
# Load a template file from scraper/templates/
# ------------------------------------------------------------
def load_template(name: str) -> str:
path = os.path.join(TEMPLATE_DIR, name)
if not os.path.exists(path):
log(f"[SCRIPTGEN] Template missing: {path}")
return ""
with open(path, "r", encoding="utf-8") as f:
return f.read()
# ------------------------------------------------------------
# Detect volumes (Volume_001, Volume_002, ...)
# ------------------------------------------------------------
def detect_volumes(book_base: str):
vols = []
for name in os.listdir(book_base):
p = os.path.join(book_base, name)
if os.path.isdir(p) and name.lower().startswith("volume_"):
try:
num = int(name.split("_")[1])
vols.append((num, name))
except Exception:
continue
vols.sort()
return [v[0] for v in vols]
# ------------------------------------------------------------
# Build the dynamic merge block
# ------------------------------------------------------------
def build_merge_block(title: str, author: str, volumes):
lines = []
for vol in volumes:
line = (
f'm4b-tool merge --jobs=4 --writer="{author}" '
f'--albumartist="{author}" --album="{title}" '
f'--name="{title}" --output-file="{title}-{vol}.m4b" '
f'"{vol}" -vvv'
)
lines.append(line)
if not lines:
return ""
return " \\\n&& ".join(lines) + "\n"
# ------------------------------------------------------------
# Main generator
# ------------------------------------------------------------
def generate_all_scripts(book_base: str, title: str, author: str):
log(f"[SCRIPTGEN] Generating scripts in {book_base}")
# Load templates
say_template = load_template("say.template")
cleanup_template = load_template("cleanup.template")
volumes = detect_volumes(book_base)
log(f"[SCRIPTGEN] Volumes detected: {volumes}")
merge_block = build_merge_block(title, author, volumes)
# --------------------------------------------------------
# allinone.txt = say + cleanup + merge
# --------------------------------------------------------
outfile = os.path.join(book_base, "allinone.txt")
with open(outfile, "w", encoding="utf-8") as f:
f.write(say_template)
f.write("\n")
f.write(cleanup_template)
f.write("\n")
f.write(merge_block)
os.chmod(outfile, os.stat(outfile).st_mode | stat.S_IEXEC)
log(f"[SCRIPTGEN] Created {outfile}")
# --------------------------------------------------------
# makebook.txt = merge only
# --------------------------------------------------------
outfile2 = os.path.join(book_base, "makebook.txt")
with open(outfile2, "w", encoding="utf-8") as f:
f.write(merge_block)
os.chmod(outfile2, os.stat(outfile2).st_mode | stat.S_IEXEC)
log(f"[SCRIPTGEN] Created {outfile2}")
# --------------------------------------------------------
# say.txt = say + cleanup
# --------------------------------------------------------
outfile3 = os.path.join(book_base, "say.txt")
with open(outfile3, "w", encoding="utf-8") as f:
f.write(say_template)
f.write("\n")
f.write(cleanup_template)
os.chmod(outfile3, os.stat(outfile3).st_mode | stat.S_IEXEC)
log(f"[SCRIPTGEN] Created {outfile3}")
log(f"[SCRIPTGEN] All scripts generated successfully for '{title}'")
__all__ = ["generate_all_scripts"]

@ -32,20 +32,10 @@ print(">>> [IMPORT] download_tasks.py loaded")
# TIMESTAMPED LOG WRAPPER
# -----------------------------------------------------------
def log_msg(book_id: str, message: str):
"""
Log with compact timestamp + book_id.
Pushes to:
- console (publisher.log)
- GUI Redis (push_ui)
"""
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
full = f"{ts} [{book_id}] {message}"
# console
log(full)
# GUI (Redis rolling list)
push_ui(full) # NO book_id param — ui_log is DOM
push_ui(full)
# -----------------------------------------------------------
@ -68,12 +58,12 @@ GLOBAL_DELAY = int(os.getenv("DOWNLOAD_GLOBAL_MIN_DELAY", "1"))
DELAY_KEY = "download:delay_lock"
# -----------------------------------------------------------
# Redis connection
# Redis
# -----------------------------------------------------------
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
redis_client = redis.Redis.from_url(REDIS_URL)
SEM_KEY = "download:active" # semaphore counter
SEM_KEY = "download:active"
# ============================================================
@ -145,21 +135,15 @@ def download_chapter(
"abort": True,
}
# Mark started — ensures parse/save must run
# Mark started
mark_chapter_started(book_id, chapter_num)
# Hard delay
if GLOBAL_DELAY > 0:
time.sleep(GLOBAL_DELAY)
save_path = get_save_path(chapter_num, base_path)
# -----------------------------------------------------------
# SKIP existing
# NEW POSITION FOR SKIP BLOCK (before any delay logic)
# -----------------------------------------------------------
save_path = get_save_path(chapter_num, base_path)
if os.path.exists(save_path):
wait_for_global_delay()
set_global_delay()
log_msg(book_id, f"[DL] SKIP {chapter_num} (exists) → {save_path}")
return {
"chapter": chapter_num,
@ -169,6 +153,12 @@ def download_chapter(
"path": save_path,
}
# -----------------------------------------------------------
# Hard delay (only for real downloads)
# -----------------------------------------------------------
if GLOBAL_DELAY > 0:
time.sleep(GLOBAL_DELAY)
# Sync delay
wait_for_global_delay()
@ -207,10 +197,7 @@ def download_chapter(
delay = BASE_DELAY * (BACKOFF**attempt)
# 429 hard block
if (
hasattr(exc, "response")
and getattr(exc.response, "status_code", None) == 429
):
if getattr(getattr(exc, "response", None), "status_code", None) == 429:
log_msg(
book_id,
f"[DL] 429 {chapter_num} → WAIT {DELAY_429}s "

@ -0,0 +1,44 @@
#!/bin/sh
main_dir="$( cd "$( dirname "$0" )" && pwd )"
shopt -s nocasematch # For case-insensitive regex matching
for subfolder in "$main_dir"/*; do
if [ -d "$subfolder" ]; then
audiofolder="$subfolder/Audio"
mkdir -p "$audiofolder"
for entry in "$subfolder"/*.txt; do
fn=$(basename "$entry")
[[ "${entry##*.}" =~ txt ]]
echo "$fn"
inputfile="$subfolder/$fn"
outputfile="$audiofolder/${fn%.*}.m4b"
now=$(date +"%T")
echo "Current time : $now"
echo "$inputfile ->"
echo "$outputfile" && \
if [ -f $outputfile ]; then
echo $outputfile + "exists: skipping"
else
say --voice=Sinji \
--output-file="$outputfile" \
--input-file="$inputfile" \
--file-format=m4bf \
--quality=127 \
-r 200 \
--data-format=aac
fi
done
fi
done
# CLEANUP WILL BE APPENDED BY scriptgen.py

@ -0,0 +1,4 @@
find . -name "*.m4b" -size -580c | while read fname; do
echo "deleting $(ls -lah \"$fname\")"
rm "$fname"
done
Loading…
Cancel
Save