You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
37 lines
1.0 KiB
37 lines
1.0 KiB
# scraper/tasks/pipeline.py
|
|
|
|
"""
|
|
Build the pipeline for a single chapter:
|
|
download → parse → save
|
|
|
|
This module must NOT import scraping.py or controllers,
|
|
otherwise Celery will hit circular imports on worker startup.
|
|
Only import task functions here.
|
|
"""
|
|
|
|
from celery import chain
|
|
|
|
from scraper.tasks.download_tasks import download_chapter
|
|
from scraper.tasks.parse_tasks import parse_chapter
|
|
from scraper.tasks.save_tasks import save_chapter
|
|
|
|
|
|
def build_chapter_pipeline(
|
|
chapter_number: int, chapter_url: str, base_path: str, meta: dict
|
|
):
|
|
"""
|
|
Construct a Celery chain for one chapter:
|
|
1. download_chapter
|
|
2. parse_chapter
|
|
3. save_chapter
|
|
"""
|
|
|
|
return chain(
|
|
# download_chapter needs ALL THREE arguments
|
|
download_chapter.s(chapter_number, chapter_url, base_path),
|
|
# parse_chapter gets the output of download_chapter + meta as extra arg
|
|
parse_chapter.s(meta),
|
|
# save_chapter needs base_path as extra arg
|
|
save_chapter.s(base_path),
|
|
)
|