You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/services/audio_completion.py

95 lines
3.4 KiB

# ============================================================
# File: scraper/services/audio_completion.py
# Purpose:
# Orchestration hook after audio completion.
#
# Rules (STRICT):
# - ALWAYS read via get_book_state()
# - Use ONLY merged counters from repository
# - NO usage of derived status field
# - Completion rule:
# audio_completed < chapters_total → NOT DONE
# ============================================================
from logbus.publisher import log
from scraper.logger_decorators import logcall
from db.repository import (
get_book_state,
try_trigger_statuscheck,
)
from scraper.services.status_check_service import StatusCheckService
from scraper.tasks.m4b_tasks import queue_m4b_for_book
@logcall
def trigger_audio_completion_check(book_idx: str):
"""
Called after inc_audio_done() OR inc_audio_skipped().
Flow:
1. Fetch canonical merged state from repository
2. Evaluate completion via merged counters ONLY
3. Run filesystem validation (authoritative)
4. Apply idempotency guard
5. Queue m4b exactly once
"""
try:
# ----------------------------------------------------
# STEP 1 — CANONICAL MERGED STATE
# ----------------------------------------------------
state = get_book_state(book_idx)
chapters_total = int(state.get("chapters_total", 0))
audio_done = int(state.get("audio_done", 0))
audio_skipped = int(state.get("audio_skipped", 0))
audio_completed = audio_done + audio_skipped
log(
f"[AUDIO-COMPLETION] book={book_idx} "
f"audio_completed={audio_completed} chapters_total={chapters_total}"
)
# ----------------------------------------------------
# STEP 2 — FAST REJECT (MERGED COUNTERS ONLY)
# ----------------------------------------------------
if chapters_total <= 0 or audio_completed < chapters_total:
log(f"[AUDIO-COMPLETION] not yet complete for book={book_idx}")
return
# ----------------------------------------------------
# STEP 3 — FILESYSTEM VALIDATION (AUTHORITATIVE)
# ----------------------------------------------------
result = StatusCheckService.run(book_idx)
fs = result.get("filesystem", {})
audio_files = fs.get("audio_files", 0)
chapters_txt = fs.get("chapters_txt", 0)
effective_audio = audio_files + audio_skipped
if effective_audio < chapters_txt:
log(
f"[AUDIO-COMPLETION] FS validation failed "
f"(audio_files={audio_files}, skipped={audio_skipped}, txt={chapters_txt})"
)
return
# ----------------------------------------------------
# STEP 4 — IDEMPOTENCY GUARD (AFTER FS CONFIRMATION)
# ----------------------------------------------------
if not try_trigger_statuscheck(book_idx):
log(f"[AUDIO-COMPLETION] statuscheck already triggered for {book_idx}")
return
# ----------------------------------------------------
# STEP 5 — FINAL ACTION
# ----------------------------------------------------
log(f"[AUDIO-COMPLETION] DONE → queue m4b for book={book_idx}")
queue_m4b_for_book(book_idx)
except Exception as exc:
# MUST NEVER break audio workers
log(f"[AUDIO-COMPLETION][ERROR] book={book_idx} error={exc}")