You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/tasks/pipeline.py

37 lines
1.0 KiB

# scraper/tasks/pipeline.py
"""
Build the pipeline for a single chapter:
download → parse → save
This module must NOT import scraping.py or controllers,
otherwise Celery will hit circular imports on worker startup.
Only import task functions here.
"""
from celery import chain
from scraper.tasks.download_tasks import download_chapter
from scraper.tasks.parse_tasks import parse_chapter
from scraper.tasks.save_tasks import save_chapter
def build_chapter_pipeline(
chapter_number: int, chapter_url: str, base_path: str, meta: dict
):
"""
Construct a Celery chain for one chapter:
1. download_chapter
2. parse_chapter
3. save_chapter
"""
return chain(
# download_chapter needs ALL THREE arguments
download_chapter.s(chapter_number, chapter_url, base_path),
# parse_chapter gets the output of download_chapter + meta as extra arg
parse_chapter.s(meta),
# save_chapter needs base_path as extra arg
save_chapter.s(base_path),
)