You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
133 lines
3.9 KiB
133 lines
3.9 KiB
# ============================================================
|
|
# File: scraper/tasks/m4b_tasks.py
|
|
# ============================================================
|
|
|
|
import os
|
|
import subprocess
|
|
from typing import List
|
|
|
|
from celery_app import celery_app
|
|
from logbus.publisher import log
|
|
from scraper.logger_decorators import logcall
|
|
|
|
from db.repository import fetch_book, store_m4b_error
|
|
from scraper.scriptgen import build_merge_block
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# Helper: detect volumes (UNCHANGED)
|
|
# ------------------------------------------------------------
|
|
def detect_volumes(book_base: str) -> List[str]:
|
|
volumes = []
|
|
for name in os.listdir(book_base):
|
|
if name.lower().startswith("volume_"):
|
|
full = os.path.join(book_base, name)
|
|
if os.path.isdir(full):
|
|
volumes.append(name)
|
|
volumes.sort()
|
|
return volumes
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# Celery task
|
|
# ------------------------------------------------------------
|
|
@celery_app.task(bind=True, queue="m4b", ignore_result=True)
|
|
@logcall
|
|
def run_m4btool(self, book_idx: str):
|
|
|
|
log(f"[M4B] START book_idx={book_idx}")
|
|
|
|
book = fetch_book(book_idx)
|
|
if not book:
|
|
log(f"[M4B] Book not found in SQL: book_idx={book_idx}")
|
|
return
|
|
|
|
title = book.get("title", book_idx)
|
|
author = book.get("author", "Unknown")
|
|
|
|
output_root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
|
|
book_base = os.path.join(output_root, title)
|
|
|
|
log(f"[M4B] Book base directory: {book_base}")
|
|
|
|
if not os.path.isdir(book_base):
|
|
log(f"[M4B] Book directory missing: {book_base}")
|
|
return
|
|
|
|
volumes = detect_volumes(book_base)
|
|
if not volumes:
|
|
log(f"[M4B] No volumes found for book_idx={book_idx}")
|
|
return
|
|
|
|
log(f"[M4B] Volumes detected: {volumes}")
|
|
|
|
# --------------------------------------------------------
|
|
# Build canonical commands via scriptgen
|
|
# --------------------------------------------------------
|
|
merge_block = build_merge_block(
|
|
title, author, [(i + 1, v) for i, v in enumerate(volumes)]
|
|
)
|
|
commands = [c.strip() for c in merge_block.split("&&") if c.strip()]
|
|
|
|
for volume, cmd in zip(volumes, commands):
|
|
audio_dir = os.path.join(book_base, volume, "Audio")
|
|
if not os.path.isdir(audio_dir):
|
|
log(f"[M4B] SKIP {volume}: no Audio directory")
|
|
continue
|
|
|
|
log(f"[M4B] Running for volume={volume}")
|
|
log(f"[M4B] CMD: {cmd}")
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
cmd,
|
|
cwd=book_base,
|
|
shell=True,
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
|
|
if result.stdout:
|
|
log(f"[M4B][STDOUT] {result.stdout}")
|
|
|
|
except subprocess.CalledProcessError as exc:
|
|
log(f"[M4B][FAILED] volume={volume}")
|
|
|
|
if exc.stdout:
|
|
log(f"[M4B][STDOUT] {exc.stdout}")
|
|
if exc.stderr:
|
|
log(f"[M4B][STDERR] {exc.stderr}")
|
|
|
|
store_m4b_error(
|
|
book_idx=book_idx,
|
|
volume=volume,
|
|
error_text=exc.stderr or str(exc),
|
|
)
|
|
continue
|
|
|
|
except Exception as exc:
|
|
log(f"[M4B][UNEXPECTED ERROR] volume={volume}: {exc}")
|
|
|
|
store_m4b_error(
|
|
book_idx=book_idx,
|
|
volume=volume,
|
|
error_text=str(exc),
|
|
)
|
|
continue
|
|
|
|
log(f"[M4B] FINISHED book_idx={book_idx}")
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# Orchestration helper (UNCHANGED)
|
|
# ------------------------------------------------------------
|
|
@logcall
|
|
def queue_m4b_for_book(book_idx: str):
|
|
log(f"[M4B] Queuing m4b-tool for book_idx={book_idx}")
|
|
celery_app.send_task(
|
|
"scraper.tasks.m4b_tasks.run_m4btool",
|
|
args=[book_idx],
|
|
queue="m4b",
|
|
)
|