You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
44 lines
1.3 KiB
44 lines
1.3 KiB
# =========================================================
|
|
# File: scraper/tasks/pipeline.py
|
|
# Purpose:
|
|
# Build Celery chains for chapter processing using chapter_dict.
|
|
#
|
|
# New Chain:
|
|
# download_chapter(book_id, chapter_dict, book_meta)
|
|
# → parse_chapter(download_result)
|
|
# → save_chapter(parsed_result)
|
|
# → update_progress(final_result, book_id)
|
|
#
|
|
# All subtasks pass through result dicts unchanged so the
|
|
# next stage receives the correct fields.
|
|
# =========================================================
|
|
|
|
from celery import chain
|
|
|
|
from scraper.tasks.download_tasks import download_chapter
|
|
from scraper.tasks.parse_tasks import parse_chapter
|
|
from scraper.tasks.save_tasks import save_chapter
|
|
from scraper.tasks.progress_tasks import update_progress
|
|
|
|
|
|
def build_chapter_pipeline(
|
|
book_id: str,
|
|
chapter_dict: dict,
|
|
book_meta: dict,
|
|
):
|
|
"""
|
|
Build a Celery chain for one chapter using chapter_dict.
|
|
|
|
download_chapter(book_id, chapter_dict, book_meta)
|
|
→ parse_chapter(download_result)
|
|
→ save_chapter(parsed_result)
|
|
→ update_progress(result, book_id)
|
|
"""
|
|
|
|
return chain(
|
|
download_chapter.s(book_id, chapter_dict, book_meta),
|
|
parse_chapter.s(),
|
|
save_chapter.s(),
|
|
update_progress.s(book_id),
|
|
)
|