kmftools/bookscraper/scraper/tasks/pipeline.py

# =========================================================
# File: scraper/tasks/pipeline.py
# Purpose:
#   Build Celery chains for chapter processing.
#
# download → parse → save → update_progress
#
# =========================================================

from celery import chain

from scraper.tasks.download_tasks import download_chapter
from scraper.tasks.parse_tasks import parse_chapter
from scraper.tasks.save_tasks import save_chapter
from scraper.tasks.progress_tasks import update_progress  # NEW


def build_chapter_pipeline(
    book_id: str,
    chapter_number: int,
    chapter_url: str,
    base_path: str,
    meta: dict,
):
    """
    Chapter pipeline:

    download_chapter(book_id, chapter_num, url, base_path)
        → parse_chapter(download_result, meta)
            → save_chapter(parsed_result, base_path)
                → update_progress(result, book_id)
    """

    return chain(
        download_chapter.s(book_id, chapter_number, chapter_url, base_path),
        parse_chapter.s(meta),
        save_chapter.s(base_path),
        update_progress.s(book_id),  # ← centrale progress update
    )