You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/tasks/save_tasks.py

82 lines
2.3 KiB

# =========================================================
# File: scraper/tasks/save_tasks.py
# Purpose: Save parsed chapter text to disk.
# =========================================================
print(">>> [IMPORT] save_tasks.py loaded")
from celery import shared_task
import os
from scraper.utils import get_save_path
from scraper.tasks.download_tasks import log_msg # unified logger
from scraper.progress import (
inc_completed,
inc_skipped,
inc_failed,
add_failed_chapter, # <-- enige noodzakelijke aanvulling
)
@shared_task(bind=True, queue="save", ignore_result=False)
def save_chapter(self, parsed: dict, base_path: str):
"""
Save parsed chapter text to disk.
parsed = {
"book_id": str,
"chapter": int,
"text": str,
"url": str,
"skipped": bool,
"path": optional str
}
"""
book_id = parsed.get("book_id", "NOBOOK")
chapter = parsed.get("chapter")
# ------------------------------------------------------------
# SKIP CASE (from download or parse stage)
# ------------------------------------------------------------
if parsed.get("skipped"):
path = parsed.get("path", "(no-path)")
log_msg(book_id, f"[SAVE] SKIP chapter {chapter}{path}")
inc_skipped(book_id)
return {"chapter": chapter, "path": path, "skipped": True}
# ------------------------------------------------------------
# NORMAL SAVE
# ------------------------------------------------------------
try:
text = parsed.get("text", "")
url = parsed.get("url")
if chapter is None:
raise ValueError("Missing chapter number in parsed payload")
# Ensure folder exists
os.makedirs(base_path, exist_ok=True)
# Build file path
path = get_save_path(chapter, base_path)
# Write chapter text
with open(path, "w", encoding="utf-8") as f:
f.write(text)
log_msg(book_id, f"[SAVE] Saved chapter {chapter}{path}")
inc_completed(book_id)
return {"book_id": book_id, "chapter": chapter, "path": path}
except Exception as exc:
log_msg(book_id, f"[SAVE] ERROR saving chapter {chapter}: {exc}")
inc_failed(book_id)
add_failed_chapter(book_id, chapter, str(exc)) # <-- essentieel
raise