You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
34 lines
919 B
34 lines
919 B
# scraper/tasks/download_tasks.py
|
|
|
|
from celery import shared_task
|
|
from logbus.publisher import log
|
|
import requests
|
|
|
|
|
|
@shared_task(bind=True, queue="download", ignore_result=False)
|
|
def download_chapter(self, chapter_number: int, chapter_url: str):
|
|
"""
|
|
Download a chapter page and return raw HTML for parsing.
|
|
Does NOT save anything; that is done by save_tasks.py
|
|
"""
|
|
|
|
log(f"[DL] Downloading chapter {chapter_number}: {chapter_url}")
|
|
|
|
try:
|
|
resp = requests.get(chapter_url, timeout=15)
|
|
resp.raise_for_status()
|
|
html = resp.text
|
|
|
|
log(f"[DL] OK {chapter_number}: {len(html)} bytes")
|
|
|
|
# Dit resultaat wordt doorgegeven aan parse_task
|
|
return {
|
|
"chapter": chapter_number,
|
|
"url": chapter_url,
|
|
"html": html,
|
|
}
|
|
|
|
except Exception as exc:
|
|
log(f"[DL] ERROR downloading {chapter_url}: {exc}")
|
|
raise
|