# scraper/tasks/save_tasks.py print(">>> [IMPORT] save_tasks.py loaded") from celery import shared_task from logbus.publisher import log import os from scraper.utils import get_save_path @shared_task(bind=True, queue="save", ignore_result=False) def save_chapter(self, parsed: dict, base_path: str): print(f">>> [save_tasks] save_chapter() CALLED for chapter {parsed.get('chapter')}") # ---------------------------- # SKIP: If pipeline marked skip # ---------------------------- if parsed.get("skipped"): chapter = parsed.get("chapter") path = parsed.get("path") log(f"[SAVE] SKIP chapter {chapter} (already exists) → {path}") print(f">>> [save_tasks] SKIPPED {path}") return {"chapter": chapter, "path": path, "skipped": True} try: chapter_number = parsed.get("chapter") url = parsed.get("url") text = parsed.get("text", "") if not chapter_number: raise ValueError("Missing chapter_number in parsed payload") os.makedirs(base_path, exist_ok=True) # unified filename logic path = get_save_path(chapter_number, base_path) with open(path, "w", encoding="utf-8") as f: f.write(text) log(f"[SAVE] Saved chapter {chapter_number} → {path}") print(f">>> [save_tasks] SAVED {path}") return {"chapter": chapter_number, "path": path} except Exception as exc: log(f"[SAVE] ERROR saving chapter from {url}: {exc}") print(f">>> [save_tasks] ERROR: {exc}") raise