# ============================================================ # File: scraper/tasks/save_tasks.py # Purpose: Save parsed chapter text to disk + trigger audio. # ============================================================ print(">>> [IMPORT] save_tasks.py loaded") from celery import shared_task import os from scraper.utils import get_save_path from scraper.tasks.download_tasks import log_msg # unified logger from scraper.progress import ( inc_completed, inc_skipped, inc_failed, add_failed_chapter, ) from scraper.tasks.audio_tasks import generate_audio @shared_task(bind=True, queue="save", ignore_result=False) def save_chapter(self, parsed: dict, base_path: str): """ Save parsed chapter text to disk. parsed = { "book_id": str, "chapter": int, "text": str, "url": str, "skipped": bool, "path": optional str } """ book_id = parsed.get("book_id", "NOBOOK") chapter = parsed.get("chapter") # ------------------------------------------------------------ # SKIP CASE (download or parse skipped the chapter) # ------------------------------------------------------------ if parsed.get("skipped"): path = parsed.get("path", "(no-path)") log_msg(book_id, f"[SAVE] SKIP chapter {chapter} → {path}") inc_skipped(book_id) # Determine volume name from the base path volume_name = os.path.basename(base_path.rstrip("/")) # Queue audio using the existing saved file try: generate_audio.delay( book_id, volume_name, chapter, f"Chapter {chapter}", path, # <<-- correct: this is always the real file path ) log_msg( book_id, f"[AUDIO] Task queued (SKIPPED) for chapter {chapter} in {volume_name}", ) except Exception as audio_exc: log_msg( book_id, f"[AUDIO] ERROR queueing (SKIPPED) chapter {chapter}: {audio_exc}", ) return { "book_id": book_id, # <<< FIXED "chapter": chapter, "path": path, "skipped": True, } # ------------------------------------------------------------ # NORMAL SAVE CASE # ------------------------------------------------------------ try: text = parsed.get("text", "") if chapter is None: raise ValueError("Missing chapter number in parsed payload") # Ensure chapter folder exists os.makedirs(base_path, exist_ok=True) # Build chapter file path path = get_save_path(chapter, base_path) # Save chapter text to disk with open(path, "w", encoding="utf-8") as f: f.write(text) log_msg(book_id, f"[SAVE] Saved chapter {chapter} → {path}") inc_completed(book_id) # Determine volume name volume_name = os.path.basename(base_path.rstrip("/")) # Queue audio task (always use the saved file path) try: generate_audio.delay( book_id, volume_name, chapter, f"Chapter {chapter}", path, ) log_msg( book_id, f"[AUDIO] Task queued for chapter {chapter} in {volume_name}" ) except Exception as audio_exc: log_msg(book_id, f"[AUDIO] ERROR queueing chapter {chapter}: {audio_exc}") return {"book_id": book_id, "chapter": chapter, "path": path} except Exception as exc: log_msg(book_id, f"[SAVE] ERROR saving chapter {chapter}: {exc}")