# ========================================================= # File: scraper/tasks/save_tasks.py # Purpose: Save parsed chapter text to disk. # # Abort Behavior: # - Save MUST ALWAYS RUN once download has started. # - Abort only prevents new chapters from starting (download skip). # - Save is skipped ONLY when download/parse indicated "skipped". # # This guarantees no half-written chapters. # ========================================================= print(">>> [IMPORT] save_tasks.py loaded") from celery import shared_task from logbus.publisher import log import os from scraper.utils import get_save_path @shared_task(bind=True, queue="save", ignore_result=False) def save_chapter(self, parsed: dict, base_path: str): print(f">>> [save_tasks] save_chapter() CALLED for chapter {parsed.get('chapter')}") # ------------------------------------------------------------ # SKIP CASE: # - Only skip when download OR parse indicated skip # - NOT related to abort (abort never skips parse/save) # ------------------------------------------------------------ if parsed.get("skipped"): chapter = parsed.get("chapter") path = parsed.get("path") log(f"[SAVE] SKIP chapter {chapter} (already exists or skipped) → {path}") print(f">>> [save_tasks] SKIPPED {path}") return {"chapter": chapter, "path": path, "skipped": True} try: chapter_number = parsed.get("chapter") url = parsed.get("url") text = parsed.get("text", "") if not chapter_number: raise ValueError("Missing chapter_number in parsed payload") # Ensure base path exists os.makedirs(base_path, exist_ok=True) # Unified filename logic path = get_save_path(chapter_number, base_path) # ------------------------------------------------------------ # WRITE CHAPTER TEXT TO FILE # ------------------------------------------------------------ with open(path, "w", encoding="utf-8") as f: f.write(text) log(f"[SAVE] Saved chapter {chapter_number} → {path}") print(f">>> [save_tasks] SAVED {path}") return {"chapter": chapter_number, "path": path} except Exception as exc: log(f"[SAVE] ERROR saving chapter from {url}: {exc}") print(f">>> [save_tasks] ERROR: {exc}") raise