kmftools/bookscraper/scraper/tasks/pipeline.py

# =========================================================
# File: scraper/tasks/pipeline.py
# Purpose:
#   Build Celery chains for chapter processing.
#
# Chain:
#   download_chapter(book_id, chapter_num, url, base_path)
#       → parse_chapter(download_result, meta)
#           → save_chapter(parsed_result, base_path)
#               → update_progress(final_result, book_id)
#
# All subtasks must pass through result dicts untouched so the
# next stage receives the correct fields.
# =========================================================

from celery import chain

from scraper.tasks.download_tasks import download_chapter
from scraper.tasks.parse_tasks import parse_chapter
from scraper.tasks.save_tasks import save_chapter
from scraper.tasks.progress_tasks import update_progress


def build_chapter_pipeline(
    book_id: str,
    chapter_number: int,
    chapter_url: str,
    base_path: str,
    meta: dict,
):
    """
    Build a Celery chain for one chapter.

    download_chapter(book_id, chapter_number, chapter_url, base_path)
        → parse_chapter(download_result, meta)
        → save_chapter(parsed_result, base_path)
        → update_progress(result, book_id)
    """

    return chain(
        download_chapter.s(book_id, chapter_number, chapter_url, base_path),
        parse_chapter.s(meta),
        save_chapter.s(base_path),
        update_progress.s(book_id),
    )