# ========================================================= # File: scraper/tasks/pipeline.py # Purpose: # Build Celery chains for chapter processing. # # Chain: # download_chapter(book_id, chapter_num, url, base_path) # → parse_chapter(download_result, meta) # → save_chapter(parsed_result, base_path) # → update_progress(final_result, book_id) # # All subtasks must pass through result dicts untouched so the # next stage receives the correct fields. # ========================================================= from celery import chain from scraper.tasks.download_tasks import download_chapter from scraper.tasks.parse_tasks import parse_chapter from scraper.tasks.save_tasks import save_chapter from scraper.tasks.progress_tasks import update_progress def build_chapter_pipeline( book_id: str, chapter_number: int, chapter_url: str, base_path: str, meta: dict, ): """ Build a Celery chain for one chapter. download_chapter(book_id, chapter_number, chapter_url, base_path) → parse_chapter(download_result, meta) → save_chapter(parsed_result, base_path) → update_progress(result, book_id) """ return chain( download_chapter.s(book_id, chapter_number, chapter_url, base_path), parse_chapter.s(meta), save_chapter.s(base_path), update_progress.s(book_id), )