You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
42 lines
1.0 KiB
42 lines
1.0 KiB
# =========================================================
|
|
# File: scraper/tasks/pipeline.py
|
|
# Purpose:
|
|
# Build Celery chains for chapter processing using payload dict.
|
|
#
|
|
# Pipeline v3:
|
|
# download_chapter(payload)
|
|
# → parse_chapter(payload)
|
|
# → save_chapter(payload)
|
|
# =========================================================
|
|
|
|
from celery import chain
|
|
|
|
from scraper.tasks.download_tasks import download_chapter
|
|
from scraper.tasks.parse_tasks import parse_chapter
|
|
from scraper.tasks.save_tasks import save_chapter
|
|
|
|
from scraper.logger_decorators import logcall
|
|
|
|
|
|
@logcall
|
|
def build_chapter_pipeline(book_id: str, chapter_dict: dict, book_meta: dict):
|
|
"""
|
|
Payload model passed through entire pipeline.
|
|
"""
|
|
|
|
payload = {
|
|
"book_id": book_id,
|
|
"chapter": chapter_dict,
|
|
"book_meta": book_meta,
|
|
"html": None,
|
|
"parsed": None,
|
|
"skipped": False,
|
|
"path": None,
|
|
}
|
|
|
|
return chain(
|
|
download_chapter.s(payload),
|
|
parse_chapter.s(),
|
|
save_chapter.s(),
|
|
)
|