You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
34 lines
901 B
34 lines
901 B
# scraper/tasks/download_tasks.py
|
|
from celery_app import celery_app
|
|
from logbus.publisher import log
|
|
import requests
|
|
|
|
print(">>> [IMPORT] download_tasks.py loaded")
|
|
|
|
|
|
@celery_app.task(bind=True, queue="download", ignore_result=False)
|
|
def download_chapter(self, chapter_num: int, chapter_url: str):
|
|
log(f"[DL] Downloading chapter {chapter_num}: {chapter_url}")
|
|
|
|
try:
|
|
resp = requests.get(
|
|
chapter_url,
|
|
headers={"User-Agent": "Mozilla/5.0"},
|
|
timeout=20,
|
|
)
|
|
resp.raise_for_status()
|
|
|
|
resp.encoding = resp.apparent_encoding or "gb2312"
|
|
html = resp.text
|
|
log(f"[DL] OK {chapter_num}: {len(html)} bytes")
|
|
|
|
return {
|
|
"chapter": chapter_num,
|
|
"url": chapter_url,
|
|
"html": html,
|
|
}
|
|
|
|
except Exception as exc:
|
|
log(f"[DL] ERROR {chapter_url}: {exc}")
|
|
raise
|