You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/tasks/pipeline.py

42 lines
1.0 KiB

# =========================================================
# File: scraper/tasks/pipeline.py
# Purpose:
# Build Celery chains for chapter processing using payload dict.
#
# Pipeline v3:
# download_chapter(payload)
# → parse_chapter(payload)
# → save_chapter(payload)
# =========================================================
from celery import chain
from scraper.tasks.download_tasks import download_chapter
from scraper.tasks.parse_tasks import parse_chapter
from scraper.tasks.save_tasks import save_chapter
from scraper.logger_decorators import logcall
@logcall
def build_chapter_pipeline(book_id: str, chapter_dict: dict, book_meta: dict):
"""
Payload model passed through entire pipeline.
"""
payload = {
"book_id": book_id,
"chapter": chapter_dict,
"book_meta": book_meta,
"html": None,
"parsed": None,
"skipped": False,
"path": None,
}
return chain(
download_chapter.s(payload),
parse_chapter.s(),
save_chapter.s(),
)