You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/tasks/download_tasks.py

34 lines
901 B

# scraper/tasks/download_tasks.py
from celery_app import celery_app
from logbus.publisher import log
import requests
print(">>> [IMPORT] download_tasks.py loaded")
@celery_app.task(bind=True, queue="download", ignore_result=False)
def download_chapter(self, chapter_num: int, chapter_url: str):
log(f"[DL] Downloading chapter {chapter_num}: {chapter_url}")
try:
resp = requests.get(
chapter_url,
headers={"User-Agent": "Mozilla/5.0"},
timeout=20,
)
resp.raise_for_status()
resp.encoding = resp.apparent_encoding or "gb2312"
html = resp.text
log(f"[DL] OK {chapter_num}: {len(html)} bytes")
return {
"chapter": chapter_num,
"url": chapter_url,
"html": html,
}
except Exception as exc:
log(f"[DL] ERROR {chapter_url}: {exc}")
raise