You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
40 lines
1.1 KiB
40 lines
1.1 KiB
# =========================================================
|
|
# File: scraper/tasks/pipeline.py
|
|
# Purpose:
|
|
# Build Celery chains for chapter processing.
|
|
#
|
|
# download → parse → save → update_progress
|
|
#
|
|
# =========================================================
|
|
|
|
from celery import chain
|
|
|
|
from scraper.tasks.download_tasks import download_chapter
|
|
from scraper.tasks.parse_tasks import parse_chapter
|
|
from scraper.tasks.save_tasks import save_chapter
|
|
from scraper.tasks.progress_tasks import update_progress # NEW
|
|
|
|
|
|
def build_chapter_pipeline(
|
|
book_id: str,
|
|
chapter_number: int,
|
|
chapter_url: str,
|
|
base_path: str,
|
|
meta: dict,
|
|
):
|
|
"""
|
|
Chapter pipeline:
|
|
|
|
download_chapter(book_id, chapter_num, url, base_path)
|
|
→ parse_chapter(download_result, meta)
|
|
→ save_chapter(parsed_result, base_path)
|
|
→ update_progress(result, book_id)
|
|
"""
|
|
|
|
return chain(
|
|
download_chapter.s(book_id, chapter_number, chapter_url, base_path),
|
|
parse_chapter.s(meta),
|
|
save_chapter.s(base_path),
|
|
update_progress.s(book_id), # ← centrale progress update
|
|
)
|