You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
46 lines
1.4 KiB
46 lines
1.4 KiB
# =========================================================
|
|
# File: scraper/tasks/pipeline.py
|
|
# Purpose:
|
|
# Build Celery chains for chapter processing.
|
|
#
|
|
# Chain:
|
|
# download_chapter(book_id, chapter_num, url, base_path)
|
|
# → parse_chapter(download_result, meta)
|
|
# → save_chapter(parsed_result, base_path)
|
|
# → update_progress(final_result, book_id)
|
|
#
|
|
# All subtasks must pass through result dicts untouched so the
|
|
# next stage receives the correct fields.
|
|
# =========================================================
|
|
|
|
from celery import chain
|
|
|
|
from scraper.tasks.download_tasks import download_chapter
|
|
from scraper.tasks.parse_tasks import parse_chapter
|
|
from scraper.tasks.save_tasks import save_chapter
|
|
from scraper.tasks.progress_tasks import update_progress
|
|
|
|
|
|
def build_chapter_pipeline(
|
|
book_id: str,
|
|
chapter_number: int,
|
|
chapter_url: str,
|
|
base_path: str,
|
|
meta: dict,
|
|
):
|
|
"""
|
|
Build a Celery chain for one chapter.
|
|
|
|
download_chapter(book_id, chapter_number, chapter_url, base_path)
|
|
→ parse_chapter(download_result, meta)
|
|
→ save_chapter(parsed_result, base_path)
|
|
→ update_progress(result, book_id)
|
|
"""
|
|
|
|
return chain(
|
|
download_chapter.s(book_id, chapter_number, chapter_url, base_path),
|
|
parse_chapter.s(meta),
|
|
save_chapter.s(base_path),
|
|
update_progress.s(book_id),
|
|
)
|