# ============================================================ # File: scraper/tasks/m4b_tasks.py # ============================================================ import os import subprocess from typing import List from celery_app import celery_app from logbus.publisher import log from scraper.logger_decorators import logcall from db.repository import fetch_book, store_m4b_error from scraper.scriptgen import build_merge_block # ------------------------------------------------------------ # Helper: detect volumes (UNCHANGED) # ------------------------------------------------------------ def detect_volumes(book_base: str) -> List[str]: volumes = [] for name in os.listdir(book_base): if name.lower().startswith("volume_"): full = os.path.join(book_base, name) if os.path.isdir(full): volumes.append(name) volumes.sort() return volumes # ------------------------------------------------------------ # Celery task # ------------------------------------------------------------ @celery_app.task(bind=True, queue="m4b", ignore_result=True) @logcall def run_m4btool(self, book_idx: str): log(f"[M4B] START book_idx={book_idx}") book = fetch_book(book_idx) if not book: log(f"[M4B] Book not found in SQL: book_idx={book_idx}") return title = book.get("title", book_idx) author = book.get("author", "Unknown") output_root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output") book_base = os.path.join(output_root, title) log(f"[M4B] Book base directory: {book_base}") if not os.path.isdir(book_base): log(f"[M4B] Book directory missing: {book_base}") return volumes = detect_volumes(book_base) if not volumes: log(f"[M4B] No volumes found for book_idx={book_idx}") return log(f"[M4B] Volumes detected: {volumes}") # -------------------------------------------------------- # Build canonical commands via scriptgen # -------------------------------------------------------- merge_block = build_merge_block( title, author, [(i + 1, v) for i, v in enumerate(volumes)] ) commands = [c.strip() for c in merge_block.split("&&") if c.strip()] for volume, cmd in zip(volumes, commands): audio_dir = os.path.join(book_base, volume, "Audio") if not os.path.isdir(audio_dir): log(f"[M4B] SKIP {volume}: no Audio directory") continue log(f"[M4B] Running for volume={volume}") log(f"[M4B] CMD: {cmd}") try: result = subprocess.run( cmd, cwd=book_base, shell=True, capture_output=True, text=True, check=True, ) if result.stdout: log(f"[M4B][STDOUT] {result.stdout}") except subprocess.CalledProcessError as exc: log(f"[M4B][FAILED] volume={volume}") if exc.stdout: log(f"[M4B][STDOUT] {exc.stdout}") if exc.stderr: log(f"[M4B][STDERR] {exc.stderr}") store_m4b_error( book_idx=book_idx, volume=volume, error_text=exc.stderr or str(exc), ) continue except Exception as exc: log(f"[M4B][UNEXPECTED ERROR] volume={volume}: {exc}") store_m4b_error( book_idx=book_idx, volume=volume, error_text=str(exc), ) continue log(f"[M4B] FINISHED book_idx={book_idx}") # ------------------------------------------------------------ # Orchestration helper (UNCHANGED) # ------------------------------------------------------------ @logcall def queue_m4b_for_book(book_idx: str): log(f"[M4B] Queuing m4b-tool for book_idx={book_idx}") celery_app.send_task( "scraper.tasks.m4b_tasks.run_m4btool", args=[book_idx], queue="m4b", )