You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
49 lines
1.5 KiB
49 lines
1.5 KiB
# scraper/tasks/save_tasks.py
|
|
print(">>> [IMPORT] save_tasks.py loaded")
|
|
|
|
from celery import shared_task
|
|
from logbus.publisher import log
|
|
import os
|
|
from scraper.utils import get_save_path
|
|
|
|
|
|
@shared_task(bind=True, queue="save", ignore_result=False)
|
|
def save_chapter(self, parsed: dict, base_path: str):
|
|
print(f">>> [save_tasks] save_chapter() CALLED for chapter {parsed.get('chapter')}")
|
|
|
|
# ----------------------------
|
|
# SKIP: If pipeline marked skip
|
|
# ----------------------------
|
|
if parsed.get("skipped"):
|
|
chapter = parsed.get("chapter")
|
|
path = parsed.get("path")
|
|
log(f"[SAVE] SKIP chapter {chapter} (already exists) → {path}")
|
|
print(f">>> [save_tasks] SKIPPED {path}")
|
|
return {"chapter": chapter, "path": path, "skipped": True}
|
|
|
|
try:
|
|
chapter_number = parsed.get("chapter")
|
|
url = parsed.get("url")
|
|
text = parsed.get("text", "")
|
|
|
|
if not chapter_number:
|
|
raise ValueError("Missing chapter_number in parsed payload")
|
|
|
|
os.makedirs(base_path, exist_ok=True)
|
|
|
|
# unified filename logic
|
|
path = get_save_path(chapter_number, base_path)
|
|
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
f.write(text)
|
|
|
|
log(f"[SAVE] Saved chapter {chapter_number} → {path}")
|
|
print(f">>> [save_tasks] SAVED {path}")
|
|
|
|
return {"chapter": chapter_number, "path": path}
|
|
|
|
except Exception as exc:
|
|
log(f"[SAVE] ERROR saving chapter from {url}: {exc}")
|
|
print(f">>> [save_tasks] ERROR: {exc}")
|
|
raise
|