You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
82 lines
2.3 KiB
82 lines
2.3 KiB
# =========================================================
|
|
# File: scraper/tasks/save_tasks.py
|
|
# Purpose: Save parsed chapter text to disk.
|
|
# =========================================================
|
|
|
|
print(">>> [IMPORT] save_tasks.py loaded")
|
|
|
|
from celery import shared_task
|
|
import os
|
|
|
|
from scraper.utils import get_save_path
|
|
from scraper.tasks.download_tasks import log_msg # unified logger
|
|
from scraper.progress import (
|
|
inc_completed,
|
|
inc_skipped,
|
|
inc_failed,
|
|
add_failed_chapter, # <-- enige noodzakelijke aanvulling
|
|
)
|
|
|
|
|
|
@shared_task(bind=True, queue="save", ignore_result=False)
|
|
def save_chapter(self, parsed: dict, base_path: str):
|
|
"""
|
|
Save parsed chapter text to disk.
|
|
|
|
parsed = {
|
|
"book_id": str,
|
|
"chapter": int,
|
|
"text": str,
|
|
"url": str,
|
|
"skipped": bool,
|
|
"path": optional str
|
|
}
|
|
"""
|
|
|
|
book_id = parsed.get("book_id", "NOBOOK")
|
|
chapter = parsed.get("chapter")
|
|
|
|
# ------------------------------------------------------------
|
|
# SKIP CASE (from download or parse stage)
|
|
# ------------------------------------------------------------
|
|
if parsed.get("skipped"):
|
|
path = parsed.get("path", "(no-path)")
|
|
log_msg(book_id, f"[SAVE] SKIP chapter {chapter} → {path}")
|
|
|
|
inc_skipped(book_id)
|
|
return {"chapter": chapter, "path": path, "skipped": True}
|
|
|
|
# ------------------------------------------------------------
|
|
# NORMAL SAVE
|
|
# ------------------------------------------------------------
|
|
try:
|
|
text = parsed.get("text", "")
|
|
url = parsed.get("url")
|
|
|
|
if chapter is None:
|
|
raise ValueError("Missing chapter number in parsed payload")
|
|
|
|
# Ensure folder exists
|
|
os.makedirs(base_path, exist_ok=True)
|
|
|
|
# Build file path
|
|
path = get_save_path(chapter, base_path)
|
|
|
|
# Write chapter text
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
f.write(text)
|
|
|
|
log_msg(book_id, f"[SAVE] Saved chapter {chapter} → {path}")
|
|
|
|
inc_completed(book_id)
|
|
|
|
return {"book_id": book_id, "chapter": chapter, "path": path}
|
|
|
|
except Exception as exc:
|
|
log_msg(book_id, f"[SAVE] ERROR saving chapter {chapter}: {exc}")
|
|
|
|
inc_failed(book_id)
|
|
add_failed_chapter(book_id, chapter, str(exc)) # <-- essentieel
|
|
|
|
raise
|