You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/tasks/save_tasks.py

124 lines
3.7 KiB

# ============================================================
# File: scraper/tasks/save_tasks.py
# Purpose: Save parsed chapter text to disk + trigger audio.
# ============================================================
print(">>> [IMPORT] save_tasks.py loaded")
from celery import shared_task
import os
from scraper.utils import get_save_path
from scraper.tasks.download_tasks import log_msg # unified logger
from scraper.progress import (
inc_completed,
inc_skipped,
inc_failed,
add_failed_chapter,
)
from scraper.tasks.audio_tasks import generate_audio
@shared_task(bind=True, queue="save", ignore_result=False)
def save_chapter(self, parsed: dict, base_path: str):
"""
Save parsed chapter text to disk.
parsed = {
"book_id": str,
"chapter": int,
"text": str,
"url": str,
"skipped": bool,
"path": optional str
}
"""
book_id = parsed.get("book_id", "NOBOOK")
chapter = parsed.get("chapter")
# ------------------------------------------------------------
# SKIP CASE (download or parse skipped the chapter)
# ------------------------------------------------------------
if parsed.get("skipped"):
path = parsed.get("path", "(no-path)")
log_msg(book_id, f"[SAVE] SKIP chapter {chapter}{path}")
inc_skipped(book_id)
# Determine volume name from the base path
volume_name = os.path.basename(base_path.rstrip("/"))
# Queue audio using the existing saved file
try:
generate_audio.delay(
book_id,
volume_name,
chapter,
f"Chapter {chapter}",
path, # <<-- correct: this is always the real file path
)
log_msg(
book_id,
f"[AUDIO] Task queued (SKIPPED) for chapter {chapter} in {volume_name}",
)
except Exception as audio_exc:
log_msg(
book_id,
f"[AUDIO] ERROR queueing (SKIPPED) chapter {chapter}: {audio_exc}",
)
return {
"book_id": book_id, # <<< FIXED
"chapter": chapter,
"path": path,
"skipped": True,
}
# ------------------------------------------------------------
# NORMAL SAVE CASE
# ------------------------------------------------------------
try:
text = parsed.get("text", "")
if chapter is None:
raise ValueError("Missing chapter number in parsed payload")
# Ensure chapter folder exists
os.makedirs(base_path, exist_ok=True)
# Build chapter file path
path = get_save_path(chapter, base_path)
# Save chapter text to disk
with open(path, "w", encoding="utf-8") as f:
f.write(text)
log_msg(book_id, f"[SAVE] Saved chapter {chapter}{path}")
inc_completed(book_id)
# Determine volume name
volume_name = os.path.basename(base_path.rstrip("/"))
# Queue audio task (always use the saved file path)
try:
generate_audio.delay(
book_id,
volume_name,
chapter,
f"Chapter {chapter}",
path,
)
log_msg(
book_id, f"[AUDIO] Task queued for chapter {chapter} in {volume_name}"
)
except Exception as audio_exc:
log_msg(book_id, f"[AUDIO] ERROR queueing chapter {chapter}: {audio_exc}")
return {"book_id": book_id, "chapter": chapter, "path": path}
except Exception as exc:
log_msg(book_id, f"[SAVE] ERROR saving chapter {chapter}: {exc}")