# ========================================================= # File: scraper/tasks/save_tasks.py # Purpose: Save parsed chapter text to disk. # ========================================================= print(">>> [IMPORT] save_tasks.py loaded") from celery import shared_task import os from scraper.utils import get_save_path from scraper.tasks.download_tasks import log_msg # unified logger from scraper.progress import ( inc_completed, inc_skipped, inc_failed, add_failed_chapter, # <-- enige noodzakelijke aanvulling ) @shared_task(bind=True, queue="save", ignore_result=False) def save_chapter(self, parsed: dict, base_path: str): """ Save parsed chapter text to disk. parsed = { "book_id": str, "chapter": int, "text": str, "url": str, "skipped": bool, "path": optional str } """ book_id = parsed.get("book_id", "NOBOOK") chapter = parsed.get("chapter") # ------------------------------------------------------------ # SKIP CASE (from download or parse stage) # ------------------------------------------------------------ if parsed.get("skipped"): path = parsed.get("path", "(no-path)") log_msg(book_id, f"[SAVE] SKIP chapter {chapter} → {path}") inc_skipped(book_id) return {"chapter": chapter, "path": path, "skipped": True} # ------------------------------------------------------------ # NORMAL SAVE # ------------------------------------------------------------ try: text = parsed.get("text", "") url = parsed.get("url") if chapter is None: raise ValueError("Missing chapter number in parsed payload") # Ensure folder exists os.makedirs(base_path, exist_ok=True) # Build file path path = get_save_path(chapter, base_path) # Write chapter text with open(path, "w", encoding="utf-8") as f: f.write(text) log_msg(book_id, f"[SAVE] Saved chapter {chapter} → {path}") inc_completed(book_id) return {"book_id": book_id, "chapter": chapter, "path": path} except Exception as exc: log_msg(book_id, f"[SAVE] ERROR saving chapter {chapter}: {exc}") inc_failed(book_id) add_failed_chapter(book_id, chapter, str(exc)) # <-- essentieel raise