You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/tasks/pipeline.py

44 lines
1.3 KiB

# =========================================================
# File: scraper/tasks/pipeline.py
# Purpose:
# Build Celery chains for chapter processing using chapter_dict.
#
# New Chain:
# download_chapter(book_id, chapter_dict, book_meta)
# → parse_chapter(download_result)
# → save_chapter(parsed_result)
# → update_progress(final_result, book_id)
#
# All subtasks pass through result dicts unchanged so the
# next stage receives the correct fields.
# =========================================================
from celery import chain
from scraper.tasks.download_tasks import download_chapter
from scraper.tasks.parse_tasks import parse_chapter
from scraper.tasks.save_tasks import save_chapter
from scraper.tasks.progress_tasks import update_progress
def build_chapter_pipeline(
book_id: str,
chapter_dict: dict,
book_meta: dict,
):
"""
Build a Celery chain for one chapter using chapter_dict.
download_chapter(book_id, chapter_dict, book_meta)
→ parse_chapter(download_result)
→ save_chapter(parsed_result)
→ update_progress(result, book_id)
"""
return chain(
download_chapter.s(book_id, chapter_dict, book_meta),
parse_chapter.s(),
save_chapter.s(),
update_progress.s(book_id),
)