You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/tasks/download_tasks.py

34 lines
919 B

# scraper/tasks/download_tasks.py
from celery import shared_task
from logbus.publisher import log
import requests
@shared_task(bind=True, queue="download", ignore_result=False)
def download_chapter(self, chapter_number: int, chapter_url: str):
"""
Download a chapter page and return raw HTML for parsing.
Does NOT save anything; that is done by save_tasks.py
"""
log(f"[DL] Downloading chapter {chapter_number}: {chapter_url}")
try:
resp = requests.get(chapter_url, timeout=15)
resp.raise_for_status()
html = resp.text
log(f"[DL] OK {chapter_number}: {len(html)} bytes")
# Dit resultaat wordt doorgegeven aan parse_task
return {
"chapter": chapter_number,
"url": chapter_url,
"html": html,
}
except Exception as exc:
log(f"[DL] ERROR downloading {chapter_url}: {exc}")
raise