# ============================================================ # File: scraper/tasks/save_tasks.py # Purpose: Save parsed chapter text to disk + trigger audio. # Updated for chapter_dict + book_meta pipeline model. # ============================================================ print(">>> [IMPORT] save_tasks.py loaded") from celery import shared_task import os from scraper.utils import get_save_path from scraper.tasks.download_tasks import log_msg # unified logger from scraper.progress import ( inc_completed, inc_chapter_done, inc_chapter_download_skipped, ) from scraper.tasks.audio_tasks import generate_audio @shared_task(bind=True, queue="save", ignore_result=False) def save_chapter(self, parsed: dict): """ New pipeline model: parsed = { "book_id": str, "chapter": chapter_dict, "text": str, "length": int, "book_meta": dict, "skipped": bool, "path": optional str (if skipped) } """ book_id = parsed.get("book_id", "NOBOOK") chapter_dict = parsed.get("chapter") or {} book_meta = parsed.get("book_meta") or {} chapter_num = chapter_dict.get("num") chapter_title = chapter_dict.get("title") or f"Chapter {chapter_num}" volume_path = chapter_dict.get("volume_path") # ------------------------------------------------------------ # VALIDATION # ------------------------------------------------------------ if chapter_num is None or volume_path is None: raise ValueError("Invalid parsed payload: chapter_dict missing fields.") # ------------------------------------------------------------ # SKIPPED CASE # ------------------------------------------------------------ if parsed.get("skipped"): path = parsed.get("path", None) log_msg(book_id, f"[SAVE] SKIP chapter {chapter_num} → {path}") inc_chapter_download_skipped(book_id) volume_name = os.path.basename(volume_path.rstrip("/")) # Queue audio only if a valid file exists if path and os.path.exists(path): try: generate_audio.delay( book_id, volume_name, chapter_num, chapter_title, path, ) log_msg( book_id, f"[AUDIO] Task queued (SKIPPED) for chapter {chapter_num} in {volume_name}", ) except Exception as audio_exc: log_msg( book_id, f"[AUDIO] ERROR queueing (SKIPPED) chapter {chapter_num}: {audio_exc}", ) return { "book_id": book_id, "chapter": chapter_dict, "path": path, "skipped": True, "book_meta": book_meta, } # ------------------------------------------------------------ # NORMAL SAVE CASE # ------------------------------------------------------------ try: text = parsed.get("text", "") # Ensure volume folder exists os.makedirs(volume_path, exist_ok=True) # Build final chapter file path path = get_save_path(chapter_num, volume_path) # Write chapter text to file with open(path, "w", encoding="utf-8") as f: f.write(text) log_msg(book_id, f"[SAVE] Saved chapter {chapter_num} → {path}") inc_chapter_done(book_id) inc_completed(book_id) # Determine volume name volume_name = os.path.basename(volume_path.rstrip("/")) # Queue audio task try: generate_audio.delay( book_id, volume_name, chapter_num, chapter_title, path, ) log_msg( book_id, f"[AUDIO] Task queued for chapter {chapter_num} in {volume_name}", ) except Exception as audio_exc: log_msg( book_id, f"[AUDIO] ERROR queueing chapter {chapter_num}: {audio_exc}" ) return { "book_id": book_id, "chapter": chapter_dict, "path": path, "book_meta": book_meta, } except Exception as exc: log_msg(book_id, f"[SAVE] ERROR saving chapter {chapter_num}: {exc}") raise