You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
150 lines
4.3 KiB
150 lines
4.3 KiB
# ============================================================
|
|
# File: scraper/tasks/statuscheck.py
|
|
# Purpose:
|
|
# Final status check after audio completion.
|
|
#
|
|
# Responsibilities:
|
|
# - Verify Redis counters (sanity check)
|
|
# - Verify filesystem (Audio files present)
|
|
# - Queue m4btool task
|
|
#
|
|
# Design rules:
|
|
# - Book-scope ONLY
|
|
# - No direct Redis usage
|
|
# - Repository is the single source of truth
|
|
# - Idempotent, defensive, non-blocking
|
|
# ============================================================
|
|
|
|
import os
|
|
from celery_app import celery_app
|
|
from logbus.publisher import log
|
|
|
|
from scraper.logger_decorators import logcall
|
|
|
|
from db.repository import (
|
|
get_audio_done,
|
|
get_chapters_total,
|
|
set_status,
|
|
fetch_book,
|
|
)
|
|
|
|
from scraper.tasks.m4b_tasks import run_m4btool
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# Helpers
|
|
# ------------------------------------------------------------
|
|
@log
|
|
def _detect_volumes(book_base: str):
|
|
"""
|
|
Return sorted list of Volume_XXX directories.
|
|
"""
|
|
vols = []
|
|
for name in os.listdir(book_base):
|
|
if name.lower().startswith("volume_"):
|
|
full = os.path.join(book_base, name)
|
|
if os.path.isdir(full):
|
|
vols.append(name)
|
|
vols.sort()
|
|
return vols
|
|
|
|
|
|
@logcall
|
|
def _count_audio_files(audio_dir: str) -> int:
|
|
"""
|
|
Count .m4b files in an Audio directory.
|
|
"""
|
|
if not os.path.isdir(audio_dir):
|
|
return 0
|
|
return len([f for f in os.listdir(audio_dir) if f.lower().endswith(".m4b")])
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# Celery task
|
|
# ------------------------------------------------------------
|
|
@celery_app.task(bind=True, queue="controller", ignore_result=True)
|
|
@logcall
|
|
def run_statuscheck(self, book_idx: str):
|
|
"""
|
|
Final statuscheck before m4btool execution.
|
|
|
|
Triggered exactly once by audio_completion quickcheck.
|
|
"""
|
|
|
|
log(f"[STATUSCHECK] START book={book_idx}")
|
|
|
|
# --------------------------------------------------------
|
|
# 1. Redis sanity check (via repository)
|
|
# --------------------------------------------------------
|
|
audio_done = get_audio_done(book_idx)
|
|
chapters_total = get_chapters_total(book_idx)
|
|
|
|
log(
|
|
f"[STATUSCHECK] Counters book={book_idx} "
|
|
f"audio_done={audio_done} chapters_total={chapters_total}"
|
|
)
|
|
|
|
if chapters_total <= 0:
|
|
log(f"[STATUSCHECK] No chapters_total → abort")
|
|
return
|
|
|
|
if audio_done < chapters_total:
|
|
# Defensive: should not happen, but never assume
|
|
log(
|
|
f"[STATUSCHECK] Audio not complete yet "
|
|
f"({audio_done}/{chapters_total}) → abort"
|
|
)
|
|
return
|
|
|
|
# --------------------------------------------------------
|
|
# 2. Fetch book metadata (for paths & m4b meta)
|
|
# --------------------------------------------------------
|
|
book = fetch_book(book_idx)
|
|
if not book:
|
|
log(f"[STATUSCHECK] Book not found in DB: {book_idx}")
|
|
return
|
|
|
|
title = book.get("title") or book_idx
|
|
author = book.get("author") or "Unknown"
|
|
|
|
# Base output directory
|
|
root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
|
|
book_base = os.path.join(root, title)
|
|
|
|
if not os.path.isdir(book_base):
|
|
log(f"[STATUSCHECK] Book directory missing: {book_base}")
|
|
return
|
|
|
|
# --------------------------------------------------------
|
|
# 3. Filesystem validation (light, non-blocking)
|
|
# --------------------------------------------------------
|
|
volumes = _detect_volumes(book_base)
|
|
|
|
if not volumes:
|
|
log(f"[STATUSCHECK] No volumes found for {book_idx}")
|
|
# Still allow m4btool to decide (it will no-op)
|
|
else:
|
|
for vol in volumes:
|
|
audio_dir = os.path.join(book_base, vol, "Audio")
|
|
count = _count_audio_files(audio_dir)
|
|
|
|
log(f"[STATUSCHECK] {vol}: " f"{count} audio files detected")
|
|
|
|
# --------------------------------------------------------
|
|
# 4. Queue m4btool (final pipeline step)
|
|
# --------------------------------------------------------
|
|
log(f"[STATUSCHECK] Queue m4btool for book={book_idx}")
|
|
|
|
set_status(book_idx, "m4b_running")
|
|
|
|
run_m4btool.delay(
|
|
book_idx=book_idx,
|
|
book_base=book_base,
|
|
meta={
|
|
"title": title,
|
|
"author": author,
|
|
},
|
|
)
|
|
|
|
log(f"[STATUSCHECK] DONE book={book_idx}")
|