Compare commits

..

No commits in common. 'main' and 'feature/bookstate-progress-fix' have entirely different histories.

@ -125,8 +125,7 @@ docker run \
``` ```
docker compose down --remove-orphans docker compose down
docker image prune -f
docker builder prune -af docker builder prune -af
docker volume prune -f docker volume prune -f
docker compose build --no-cache docker compose build --no-cache
@ -142,10 +141,6 @@ docker compose build --no-cache web && docker compose up web
docker compose build worker_download && docker compose up worker_download docker compose build worker_download && docker compose up worker_download
docker compose down --remove-orphans
docker compose build --no-cache worker_m4b
docker compose up -d worker_m4b
docker compose up web docker compose up web
docker compose build web docker compose build web
docker compose restart web docker compose restart web
@ -157,9 +152,3 @@ tar \
--exclude=".venv" \ --exclude=".venv" \
--exclude="venv" \ --exclude="venv" \
-czvf project.tar.gz . -czvf project.tar.gz .
docker compose down
docker image rm bookscraper-worker_m4b || true
docker builder prune -af
docker compose build --no-cache worker_m4b
docker compose up -d worker_m4b

@ -349,25 +349,6 @@ def logs():
return jsonify({"lines": new_lines, "last": new_last}) return jsonify({"lines": new_lines, "last": new_last})
from flask import render_template
from scraper.services.status_check_service import StatusCheckService
from logbus.publisher import log
from db.repository import get_book_state
@app.route("/inspect/statuscheck/<book_idx>", methods=["POST"])
@logcall
def inspect_statuscheck(book_idx):
try:
StatusCheckService.run(book_idx)
return ("", 204) # background action, geen UI
except Exception as e:
log(f"[STATUSCHECK] ERROR book_idx={book_idx}: {e}")
return jsonify({"error": str(e)}), 500
# ===================================================== # =====================================================
# SECTION 4 — DEBUG ROUTES # SECTION 4 — DEBUG ROUTES
# ===================================================== # =====================================================

@ -9,7 +9,7 @@
# - Provide a clean API for tasks and Flask UI # - Provide a clean API for tasks and Flask UI
# ============================================================ # ============================================================
# ============================================================ # ============================================================
# UPDATED — canonical read model via get_book_state # File: db/repository.py (UPDATED for book_idx-only architecture)
# ============================================================ # ============================================================
from scraper.logger_decorators import logcall from scraper.logger_decorators import logcall
@ -17,6 +17,7 @@ from logbus.publisher import log
import redis import redis
import os import os
import time
# ============================================================ # ============================================================
# SQL low-level engines (snapshot storage) # SQL low-level engines (snapshot storage)
@ -28,6 +29,10 @@ from db.state_sql import (
sql_set_chapters_total, sql_set_chapters_total,
sql_register_book, sql_register_book,
sql_update_book, sql_update_book,
sql_inc_downloaded,
sql_inc_parsed,
sql_inc_audio_done,
sql_inc_audio_skipped,
) )
# ============================================================ # ============================================================
@ -44,34 +49,80 @@ from db.state_redis import (
) )
# ============================================================ # ============================================================
# Redis client (read-only for legacy + guards) # Redis setup for legacy progress paths
# ============================================================ # ============================================================
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0") REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
_r = redis.Redis.from_url(REDIS_URL, decode_responses=True) _r = redis.Redis.from_url(REDIS_URL, decode_responses=True)
# ============================================================ # ============================================================
# LEGACY PROGRESS (UI only, unchanged) # INTERNAL — LEGACY PROGRESS HELPERS (kept for UI)
# Keys remain: progress:{book_idx}:*
# ============================================================ # ============================================================
def _legacy_set_total(book_idx, total):
_r.set(f"progress:{book_idx}:total", total)
def _legacy_inc_completed(book_idx):
_r.incr(f"progress:{book_idx}:completed")
def _legacy_inc_skipped(book_idx):
_r.incr(f"progress:{book_idx}:skipped")
def _legacy_inc_failed(book_idx):
_r.incr(f"progress:{book_idx}:failed")
def _legacy_add_failed_chapter(book_idx, chapter, reason):
entry = f"Chapter {chapter}: {reason}"
_r.rpush(f"progress:{book_idx}:failed_list", entry)
def _legacy_get_failed_list(book_idx):
return _r.lrange(f"progress:{book_idx}:failed_list", 0, -1)
def _legacy_get_progress(book_idx): def _legacy_get_progress(book_idx):
total = int(_r.get(f"progress:{book_idx}:total") or 0)
completed = int(_r.get(f"progress:{book_idx}:completed") or 0)
skipped = int(_r.get(f"progress:{book_idx}:skipped") or 0)
failed = int(_r.get(f"progress:{book_idx}:failed") or 0)
abort = _r.exists(f"abort:{book_idx}") == 1
failed_list = _legacy_get_failed_list(book_idx)
return { return {
"book_idx": book_idx, "book_idx": book_idx,
"total": int(_r.get(f"progress:{book_idx}:total") or 0), "total": total,
"completed": int(_r.get(f"progress:{book_idx}:completed") or 0), "completed": completed,
"skipped": int(_r.get(f"progress:{book_idx}:skipped") or 0), "skipped": skipped,
"failed": int(_r.get(f"progress:{book_idx}:failed") or 0), "failed": failed,
"abort": _r.exists(f"abort:{book_idx}") == 1, "failed_list": failed_list,
"failed_list": _r.lrange(f"progress:{book_idx}:failed_list", 0, -1), "abort": abort,
} }
# ============================================================
# PUBLIC — PROGRESS API
# ============================================================
@logcall @logcall
def get_progress(book_idx): def get_progress(book_idx):
return _legacy_get_progress(book_idx) return _legacy_get_progress(book_idx)
@logcall
def add_failed_chapter(book_idx, chapter, reason):
_legacy_add_failed_chapter(book_idx, chapter, reason)
@logcall
def get_failed_list(book_idx):
return _legacy_get_failed_list(book_idx)
# ============================================================ # ============================================================
# FETCH (SQLite snapshot) # FETCH OPERATIONS (SQLite snapshot)
# ============================================================ # ============================================================
@logcall @logcall
def fetch_book(book_idx): def fetch_book(book_idx):
@ -84,7 +135,7 @@ def fetch_all_books():
# ============================================================ # ============================================================
# INIT / UPDATE METADATA # INIT-FLOW (SQLite metadata only)
# ============================================================ # ============================================================
@logcall @logcall
def register_book( def register_book(
@ -96,9 +147,8 @@ def register_book(
cover_path=None, cover_path=None,
book_url=None, book_url=None,
): ):
sql_register_book(
book_idx, fields = {
{
"book_idx": book_idx, "book_idx": book_idx,
"title": title, "title": title,
"author": author, "author": author,
@ -108,10 +158,15 @@ def register_book(
"book_url": book_url, "book_url": book_url,
"chapters_total": 0, "chapters_total": 0,
"status": "registered", "status": "registered",
}, }
)
log(f"[DB] Registering new book_idx={book_idx} title='{title}'")
sql_register_book(book_idx, fields)
# ============================================================
# SCRAPE-FLOW UPDATE
# ============================================================
@logcall @logcall
def update_book_after_full_scrape( def update_book_after_full_scrape(
book_idx, book_idx,
@ -121,7 +176,9 @@ def update_book_after_full_scrape(
cover_url=None, cover_url=None,
chapters_total=None, chapters_total=None,
): ):
fields = {} fields = {}
if title is not None: if title is not None:
fields["title"] = title fields["title"] = title
if author is not None: if author is not None:
@ -134,187 +191,166 @@ def update_book_after_full_scrape(
fields["chapters_total"] = chapters_total fields["chapters_total"] = chapters_total
fields["status"] = "active" fields["status"] = "active"
log(f"[DB] update metadata for book_idx={book_idx}")
sql_update_book(book_idx, fields) sql_update_book(book_idx, fields)
# ============================================================ # ============================================================
# STATUS # ACTIVE BOOK LISTS
# ============================================================
@logcall
def get_registered_books():
all_books = sql_fetch_all_books()
HIDDEN_STATES = {"hidden"}
log(f"[DB] Fetched all books for registered filter, total={len(all_books)}")
return [b for b in all_books if b.get("status") not in HIDDEN_STATES]
@logcall
def get_active_books():
all_books = sql_fetch_all_books()
HIDDEN_STATES = {"hidden", "done"}
log(f"[DB] Fetched all books for active filter, total={len(all_books)}")
return [b for b in all_books if b.get("status") not in HIDDEN_STATES]
# ============================================================
# STATUS MANAGEMENT
# ============================================================ # ============================================================
@logcall @logcall
def set_status(book_idx, status): def set_status(book_idx, status):
log(f"[DB] Setting status for {book_idx} to '{status}'")
redis_set_status(book_idx, status) redis_set_status(book_idx, status)
sql_set_status(book_idx, status) sql_set_status(book_idx, status)
# ============================================================ # ============================================================
# TOTALS # CHAPTER TOTALS
# ============================================================ # ============================================================
@logcall @logcall
def set_chapters_total(book_idx, total): def set_chapters_total(book_idx, total):
log(f"[DB] Setting chapter total for {book_idx} to {total}")
redis_set_chapters_total(book_idx, total) redis_set_chapters_total(book_idx, total)
sql_set_chapters_total(book_idx, total) sql_set_chapters_total(book_idx, total)
# _legacy_set_total(book_idx, total)
# ============================================================ # ============================================================
# COUNTERS — WRITE ONLY # COUNTERS — DOWNLOAD
# ============================================================ # ============================================================
@logcall @logcall
def inc_download_done(book_idx, amount=1): def inc_download_done(book_idx, amount=1):
log(f"[DB] Incrementing download done for {book_idx} by {amount}")
redis_inc_download_done(book_idx, amount) redis_inc_download_done(book_idx, amount)
# sql_inc_downloaded(book_idx, amount)
# _legacy_inc_completed(book_idx)
@logcall @logcall
def inc_download_skipped(book_idx, amount=1): def inc_download_skipped(book_idx, amount=1):
log(f"[DB] Incrementing download skipped for {book_idx} by {amount}")
redis_inc_download_skipped(book_idx, amount) redis_inc_download_skipped(book_idx, amount)
# _legacy_inc_skipped(book_idx)
# ============================================================
# COUNTERS — PARSE
# ============================================================
@logcall @logcall
def inc_parsed_done(book_idx, amount=1): def inc_parsed_done(book_idx, amount=1):
log(f"[DB] Incrementing parsed done for {book_idx} by {amount}")
redis_inc_parsed_done(book_idx, amount) redis_inc_parsed_done(book_idx, amount)
# sql_inc_parsed(book_idx, amount)
@logcall # ============================================================
def inc_audio_done(book_idx, amount=1): # COUNTERS — AUDIO
redis_inc_audio_done(book_idx, amount) # ============================================================
@logcall @logcall
def inc_audio_skipped(book_idx, amount=1): def inc_audio_skipped(book_idx, amount=1):
log(f"[DB] Incrementing audio skipped for {book_idx} by {amount}")
# sql_inc_audio_skipped(book_idx, amount)
redis_inc_audio_skipped(book_idx, amount) redis_inc_audio_skipped(book_idx, amount)
# ============================================================
# CANONICAL READ MODEL
# ============================================================
@logcall @logcall
def get_book_state(book_idx): def inc_audio_done(book_idx, amount=1):
""" log(f"[DB] Incrementing audio done for {book_idx} by {amount}")
Canonical merged read model. redis_inc_audio_done(book_idx, amount)
# sql_inc_audio_done(book_idx, amount)
Rules:
- SQL = snapshot baseline
- Redis = live counters
- merged = max(sql, redis)
- capped at chapters_total
"""
sqlite_row = sql_fetch_book(book_idx) or {}
redis_state = _r.hgetall(f"book:{book_idx}:state") or {}
def _int(v):
try:
return int(v)
except Exception:
return 0
chapters_total = _int(sqlite_row.get("chapters_total"))
# SQL snapshot
sql_downloaded = _int(sqlite_row.get("downloaded"))
sql_audio_done = _int(sqlite_row.get("audio_done"))
sql_audio_skipped = _int(sqlite_row.get("audio_skipped"))
# Redis live
redis_downloaded = _int(redis_state.get("chapters_download_done")) + _int(
redis_state.get("chapters_download_skipped")
)
redis_audio_done = _int(redis_state.get("audio_done"))
redis_audio_skipped = _int(redis_state.get("audio_skipped"))
# Merge
merged_downloaded = max(sql_downloaded, redis_downloaded)
merged_audio_done = max(sql_audio_done, redis_audio_done)
merged_audio_skipped = max(sql_audio_skipped, redis_audio_skipped)
if chapters_total > 0:
merged_downloaded = min(merged_downloaded, chapters_total)
merged_audio_done = min(merged_audio_done, chapters_total)
merged_audio_skipped = min(merged_audio_skipped, chapters_total)
audio_completed = merged_audio_done + merged_audio_skipped
# Build state
state = dict(sqlite_row)
state.update(
{
"downloaded": merged_downloaded,
"audio_done": merged_audio_done,
"audio_skipped": merged_audio_skipped,
"chapters_total": chapters_total,
}
)
# Derived status
status = sqlite_row.get("status") or "unknown"
if chapters_total > 0:
if merged_downloaded < chapters_total:
status = "downloading"
elif merged_downloaded == chapters_total and audio_completed < chapters_total:
status = "audio"
elif audio_completed >= chapters_total:
status = "done"
state["status"] = status
return state
# ============================================================ # ============================================================
# READ HELPERS (VIA get_book_state ONLY) # BACKWARDS COMPATIBILITY SHIMS
# These map the old API (book_id) to the new book_idx-only system
# ============================================================ # ============================================================
@logcall @logcall
def get_chapters_total(book_idx): def inc_downloaded(book_idx, amount=1):
return int(get_book_state(book_idx).get("chapters_total", 0)) return inc_download_done(book_idx, amount)
@logcall @logcall
def get_audio_done(book_idx): def inc_parsed(book_idx, amount=1):
return int(get_book_state(book_idx).get("audio_done", 0)) return inc_parsed_done(book_idx, amount)
@logcall @logcall
def get_audio_completed_total(book_idx): def inc_audio_done_legacy(book_idx, amount=1):
state = get_book_state(book_idx) return inc_audio_done(book_idx, amount)
return int(state.get("audio_done", 0)) + int(state.get("audio_skipped", 0))
# ============================================================ # ============================================================
# STATUSCHECK GUARD (INTENTIONAL DIRECT REDIS) # READ — DERIVED BOOK STATE
# ============================================================ # ============================================================
@logcall
def try_trigger_statuscheck(book_idx):
return bool(_r.set(f"book:{book_idx}:statuscheck:triggered", "1", nx=True))
# ============================================================
# ACTIVE / REGISTERED BOOK LISTS (UI API)
# ============================================================
@logcall @logcall
def get_registered_books(): def get_book_state(book_idx):
"""
Books visible in the 'registered' list in the UI.
""" """
all_books = sql_fetch_all_books() Canonical read-model for a single book.
HIDDEN_STATES = {"hidden"}
return [b for b in all_books if b.get("status") not in HIDDEN_STATES]
Responsibilities:
- Read SQLite snapshot (static metadata)
- Read Redis live state (counters / status)
- Compute derived fields (NO UI logic)
@logcall Invariants:
def get_active_books(): - downloaded = chapters_download_done + chapters_download_skipped
""" """
Books currently active in the dashboard.
"""
all_books = sql_fetch_all_books()
HIDDEN_STATES = {"hidden", "done"}
return [b for b in all_books if b.get("status") not in HIDDEN_STATES]
# --- SQLite snapshot ---
sqlite_row = sql_fetch_book(book_idx) or {}
@logcall # --- Redis live state ---
def store_m4b_error(book_idx: str, volume: str, error_text: str): key = f"book:{book_idx}:state"
""" redis_state = _r.hgetall(key) or {}
Passive storage of m4b errors.
No logic, no retries, no state transitions. # Normalize numeric redis values
""" def _int(v):
key = f"book:{book_idx}:m4b:errors" try:
entry = f"{volume}: {error_text}" return int(v)
except Exception:
return 0
_r.rpush(key, entry) # --- primary counters ---
chapters_done = _int(redis_state.get("chapters_download_done"))
chapters_skipped = _int(redis_state.get("chapters_download_skipped"))
# --- derived counters ---
downloaded = chapters_done + chapters_skipped
# --- build canonical state ---
state = {}
# 1) start with SQLite snapshot
state.update(sqlite_row)
# 2) overlay Redis live fields
state.update(redis_state)
# 3) enforce derived invariants
state["downloaded"] = downloaded
return state

@ -149,22 +149,3 @@ services:
- .env - .env
command: celery -A celery_app worker -Q scraping -n scraping@%h -l INFO command: celery -A celery_app worker -Q scraping -n scraping@%h -l INFO
restart: "no" restart: "no"
# ----------------------------------------------------------
# M4B Worker (Finalization)
# ----------------------------------------------------------
worker_m4b:
build:
context: .
dockerfile: docker/Dockerfile.m4b
container_name: worker_m4b
command: celery -A celery_app worker -Q m4b -n m4b@%h -l INFO
depends_on:
redis:
condition: service_healthy
env_file:
- .env
volumes:
- .:/app
- /Users/peter/mnt/asustor/Sync/bookscraper/books:/Users/peter/mnt/asustor/Sync/bookscraper/books
- /Users/peter/mnt/asustor/Sync/bookscraper/db:/Users/peter/mnt/asustor/Sync/bookscraper/db
restart: "no"

@ -1,70 +0,0 @@
FROM debian:12
ENV DEBIAN_FRONTEND=noninteractive
# ----------------------------------------------------------
# System + PHP (PHP 8.2 native)
# ----------------------------------------------------------
RUN apt-get update && apt-get install -y \
ffmpeg \
curl \
ca-certificates \
bash \
php-cli \
php-intl \
php-json \
php-mbstring \
php-xml \
php-curl \
php-zip \
python3 \
python3-pip \
python3-venv \
\
# build deps for mp4v2
git \
build-essential \
autoconf \
automake \
libtool \
pkg-config \
&& rm -rf /var/lib/apt/lists/*
# ----------------------------------------------------------
# Python venv (PEP 668 compliant)
# ----------------------------------------------------------
RUN python3 -m venv /opt/venv
ENV PATH="/opt/venv/bin:/usr/local/bin:$PATH"
# ----------------------------------------------------------
# Build & install mp4v2 (mp4info)
# ----------------------------------------------------------
WORKDIR /tmp
RUN git clone https://github.com/sandreas/mp4v2 \
&& cd mp4v2 \
&& ./configure \
&& make -j$(nproc) \
&& make install \
&& echo "/usr/local/lib" > /etc/ld.so.conf.d/mp4v2.conf \
&& ldconfig \
&& cd / \
&& rm -rf /tmp/mp4v2
# ----------------------------------------------------------
# Install m4b-tool
# ----------------------------------------------------------
RUN curl -L https://github.com/sandreas/m4b-tool/releases/latest/download/m4b-tool.phar \
-o /usr/local/bin/m4b-tool \
&& chmod +x /usr/local/bin/m4b-tool
# ----------------------------------------------------------
# App
# ----------------------------------------------------------
WORKDIR /app
COPY requirements.txt /app/requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
COPY . /app
CMD ["bash"]

@ -1 +0,0 @@
Subproject commit 480a73324f53d0d24bea4931c3902097f8e2a663

Binary file not shown.

@ -5,6 +5,7 @@
import os import os
import stat import stat
from logbus.publisher import log from logbus.publisher import log
from scraper.logger_decorators import logcall from scraper.logger_decorators import logcall
TEMPLATE_DIR = os.path.join(os.path.dirname(__file__), "templates") TEMPLATE_DIR = os.path.join(os.path.dirname(__file__), "templates")
@ -44,40 +45,26 @@ def detect_volumes(book_base: str):
# ------------------------------------------------------------ # ------------------------------------------------------------
def build_merge_block(title: str, author: str, volumes): def build_merge_block(title: str, author: str, volumes):
lines = [] lines = []
# --------------------------------------------------------
# Normalize input (defensive)
# --------------------------------------------------------
title = (title or "").strip()
author = (author or "").strip()
total_vols = len(volumes) total_vols = len(volumes)
# Padding-regel:
# - altijd minimaal 2 (01, 02)
# - 3 bij >=100
if total_vols >= 100: if total_vols >= 100:
pad = 3 pad = 3
else: elif total_vols >= 10:
pad = 2 pad = 2
else:
pad = 0
for num, dirname in volumes: for num, dirname in volumes:
vol_num = f"{num:0{pad}d}" # voor filename if pad > 0:
series_part = f"{num:0{pad}d}" # voor series-part (string!) vol_num = f"{num:0{pad}d}"
else:
vol_num = str(num)
line = ( line = (
f"m4b-tool merge --jobs=4 " f'm4b-tool merge --jobs=4 --writer="{author}" '
f'--writer="{author}" ' f'--albumartist="{author}" --album="{title}" '
f'--sortalbum="{title}" ' f'--name="{title}" --output-file="{title}-{vol_num}.m4b" '
f'--albumartist="{author}" '
f'--album="{title}" '
f'--name="{title}" '
f'--series="{title}" '
f'--series-part="{series_part}" '
f'--output-file="{title}-{vol_num}.m4b" '
f'"{dirname}" -vvv' f'"{dirname}" -vvv'
) )
lines.append(line) lines.append(line)
if not lines: if not lines:
@ -89,14 +76,7 @@ def build_merge_block(title: str, author: str, volumes):
# ------------------------------------------------------------ # ------------------------------------------------------------
# Main generator # Main generator
# ------------------------------------------------------------ # ------------------------------------------------------------
@logcall
def generate_all_scripts(book_base: str, title: str, author: str): def generate_all_scripts(book_base: str, title: str, author: str):
# --------------------------------------------------------
# Defensive normalize
# --------------------------------------------------------
title = (title or "").strip()
author = (author or "").strip()
log(f"[SCRIPTGEN] Generating scripts in {book_base}") log(f"[SCRIPTGEN] Generating scripts in {book_base}")
# Load templates # Load templates

@ -1,94 +0,0 @@
# ============================================================
# File: scraper/services/audio_completion.py
# Purpose:
# Orchestration hook after audio completion.
#
# Rules (STRICT):
# - ALWAYS read via get_book_state()
# - Use ONLY merged counters from repository
# - NO usage of derived status field
# - Completion rule:
# audio_completed < chapters_total → NOT DONE
# ============================================================
from logbus.publisher import log
from scraper.logger_decorators import logcall
from db.repository import (
get_book_state,
try_trigger_statuscheck,
)
from scraper.services.status_check_service import StatusCheckService
from scraper.tasks.m4b_tasks import queue_m4b_for_book
@logcall
def trigger_audio_completion_check(book_idx: str):
"""
Called after inc_audio_done() OR inc_audio_skipped().
Flow:
1. Fetch canonical merged state from repository
2. Evaluate completion via merged counters ONLY
3. Run filesystem validation (authoritative)
4. Apply idempotency guard
5. Queue m4b exactly once
"""
try:
# ----------------------------------------------------
# STEP 1 — CANONICAL MERGED STATE
# ----------------------------------------------------
state = get_book_state(book_idx)
chapters_total = int(state.get("chapters_total", 0))
audio_done = int(state.get("audio_done", 0))
audio_skipped = int(state.get("audio_skipped", 0))
audio_completed = audio_done + audio_skipped
log(
f"[AUDIO-COMPLETION] book={book_idx} "
f"audio_completed={audio_completed} chapters_total={chapters_total}"
)
# ----------------------------------------------------
# STEP 2 — FAST REJECT (MERGED COUNTERS ONLY)
# ----------------------------------------------------
if chapters_total <= 0 or audio_completed < chapters_total:
log(f"[AUDIO-COMPLETION] not yet complete for book={book_idx}")
return
# ----------------------------------------------------
# STEP 3 — FILESYSTEM VALIDATION (AUTHORITATIVE)
# ----------------------------------------------------
result = StatusCheckService.run(book_idx)
fs = result.get("filesystem", {})
audio_files = fs.get("audio_files", 0)
chapters_txt = fs.get("chapters_txt", 0)
effective_audio = audio_files + audio_skipped
if effective_audio < chapters_txt:
log(
f"[AUDIO-COMPLETION] FS validation failed "
f"(audio_files={audio_files}, skipped={audio_skipped}, txt={chapters_txt})"
)
return
# ----------------------------------------------------
# STEP 4 — IDEMPOTENCY GUARD (AFTER FS CONFIRMATION)
# ----------------------------------------------------
if not try_trigger_statuscheck(book_idx):
log(f"[AUDIO-COMPLETION] statuscheck already triggered for {book_idx}")
return
# ----------------------------------------------------
# STEP 5 — FINAL ACTION
# ----------------------------------------------------
log(f"[AUDIO-COMPLETION] DONE → queue m4b for book={book_idx}")
queue_m4b_for_book(book_idx)
except Exception as exc:
# MUST NEVER break audio workers
log(f"[AUDIO-COMPLETION][ERROR] book={book_idx} error={exc}")

@ -1,135 +0,0 @@
# ============================================================
# File: scraper/services/status_check_service.py
# Purpose:
# Handmatige, idempotente statuscheck per boek.
#
# Bepaalt op basis van het filesystem:
# - aantal gedownloade chapters (.txt)
# - aantal gegenereerde audiofiles (.m4b)
#
# En schrijft deze gevalideerde werkelijkheid naar SQL.
#
# LET OP:
# - Geen Redis
# - Geen Celery
# - Geen status-transities
# - Geen pipeline-logica
# ============================================================
import os
from datetime import datetime
from typing import Dict, Any
from logbus.publisher import log
from scraper.logger_decorators import logcall
from db.state_sql import sql_fetch_book, sql_update_book
class StatusCheckService:
"""
Statuscheck op basis van filesystem.
Single source of truth = disk.
"""
@staticmethod
@logcall
def run(book_idx: str) -> Dict[str, Any]:
"""
Voer statuscheck uit voor één boek.
Returns een inspecteerbaar dict met:
- filesystem tellingen
- SQL before / after snapshot
"""
# ----------------------------------------------------
# 1. SQL fetch (bestaat het boek?)
# ----------------------------------------------------
sql_before = sql_fetch_book(book_idx)
if not sql_before:
raise ValueError(f"[STATUSCHECK] Book not found in SQL: {book_idx}")
# ----------------------------------------------------
# 2. Bepaal filesystem root
# ----------------------------------------------------
output_root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
title = sql_before.get("title")
book_dir = os.path.join(output_root, title)
if not os.path.isdir(book_dir):
log(
f"[STATUSCHECK] No output directory for book_idx={book_idx} : title='{title}')"
)
chapters_txt = 0
audio_files = 0
volumes = 0
else:
chapters_txt = 0
audio_files = 0
volumes = 0
# ------------------------------------------------
# 3. Scan volumes
# ------------------------------------------------
for entry in os.listdir(book_dir):
if not entry.lower().startswith("volume_"):
continue
volumes += 1
volume_path = os.path.join(book_dir, entry)
if not os.path.isdir(volume_path):
continue
# ---- TXT chapters ----
for fname in os.listdir(volume_path):
if fname.lower().endswith(".txt"):
chapters_txt += 1
# ---- Audio ----
audio_dir = os.path.join(volume_path, "Audio")
if os.path.isdir(audio_dir):
for fname in os.listdir(audio_dir):
if fname.lower().endswith(".m4b"):
audio_files += 1
# ----------------------------------------------------
# 4. SQL update (snapshot)
# ----------------------------------------------------
now = datetime.utcnow().isoformat(timespec="seconds")
update_fields = {
"downloaded": chapters_txt,
"audio_done": audio_files,
"last_update": now,
}
sql_update_book(book_idx, update_fields)
sql_after = sql_fetch_book(book_idx)
# ----------------------------------------------------
# 5. Resultaat voor inspect/debug
# ----------------------------------------------------
result = {
"book_idx": book_idx,
"filesystem": {
"book_dir": book_dir,
"exists": os.path.isdir(book_dir),
"volumes": volumes,
"chapters_txt": chapters_txt,
"audio_files": audio_files,
},
"sql_before": sql_before,
"sql_after": sql_after,
"notes": [],
}
log(
f"[STATUSCHECK] book_idx={book_idx} "
f"chapters={chapters_txt} audio={audio_files}"
)
return result

@ -17,7 +17,6 @@ from scraper.abort import abort_requested
from scraper.logger_decorators import logcall from scraper.logger_decorators import logcall
from redis import Redis from redis import Redis
from urllib.parse import urlparse from urllib.parse import urlparse
from scraper.services.audio_completion import trigger_audio_completion_check
# NEW — unified repository façade # NEW — unified repository façade
from db.repository import ( from db.repository import (
@ -167,7 +166,6 @@ def generate_audio(
log(f"[AUDIO] CH{chapter_number}: Already exists → skip") log(f"[AUDIO] CH{chapter_number}: Already exists → skip")
redis_client.delete(slot_key) redis_client.delete(slot_key)
inc_audio_skipped(book_id) inc_audio_skipped(book_id)
trigger_audio_completion_check(book_id)
return return
# ------------------------------------------------------------ # ------------------------------------------------------------
@ -193,8 +191,6 @@ def generate_audio(
# NEW — repository façade # NEW — repository façade
inc_audio_done(book_id) inc_audio_done(book_id)
trigger_audio_completion_check(book_id)
log(f"trigger_audio_completion_check ")
log(f"[AUDIO]({HOST}) CH{chapter_number}: Completed") log(f"[AUDIO]({HOST}) CH{chapter_number}: Completed")
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:

@ -1,132 +0,0 @@
# ============================================================
# File: scraper/tasks/m4b_tasks.py
# ============================================================
import os
import subprocess
from typing import List
from celery_app import celery_app
from logbus.publisher import log
from scraper.logger_decorators import logcall
from db.repository import fetch_book, store_m4b_error
from scraper.scriptgen import build_merge_block
# ------------------------------------------------------------
# Helper: detect volumes (UNCHANGED)
# ------------------------------------------------------------
def detect_volumes(book_base: str) -> List[str]:
volumes = []
for name in os.listdir(book_base):
if name.lower().startswith("volume_"):
full = os.path.join(book_base, name)
if os.path.isdir(full):
volumes.append(name)
volumes.sort()
return volumes
# ------------------------------------------------------------
# Celery task
# ------------------------------------------------------------
@celery_app.task(bind=True, queue="m4b", ignore_result=True)
@logcall
def run_m4btool(self, book_idx: str):
log(f"[M4B] START book_idx={book_idx}")
book = fetch_book(book_idx)
if not book:
log(f"[M4B] Book not found in SQL: book_idx={book_idx}")
return
title = book.get("title", book_idx)
author = book.get("author", "Unknown")
output_root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
book_base = os.path.join(output_root, title)
log(f"[M4B] Book base directory: {book_base}")
if not os.path.isdir(book_base):
log(f"[M4B] Book directory missing: {book_base}")
return
volumes = detect_volumes(book_base)
if not volumes:
log(f"[M4B] No volumes found for book_idx={book_idx}")
return
log(f"[M4B] Volumes detected: {volumes}")
# --------------------------------------------------------
# Build canonical commands via scriptgen
# --------------------------------------------------------
merge_block = build_merge_block(
title, author, [(i + 1, v) for i, v in enumerate(volumes)]
)
commands = [c.strip() for c in merge_block.split("&&") if c.strip()]
for volume, cmd in zip(volumes, commands):
audio_dir = os.path.join(book_base, volume, "Audio")
if not os.path.isdir(audio_dir):
log(f"[M4B] SKIP {volume}: no Audio directory")
continue
log(f"[M4B] Running for volume={volume}")
log(f"[M4B] CMD: {cmd}")
try:
result = subprocess.run(
cmd,
cwd=book_base,
shell=True,
capture_output=True,
text=True,
check=True,
)
if result.stdout:
log(f"[M4B][STDOUT] {result.stdout}")
except subprocess.CalledProcessError as exc:
log(f"[M4B][FAILED] volume={volume}")
if exc.stdout:
log(f"[M4B][STDOUT] {exc.stdout}")
if exc.stderr:
log(f"[M4B][STDERR] {exc.stderr}")
store_m4b_error(
book_idx=book_idx,
volume=volume,
error_text=exc.stderr or str(exc),
)
continue
except Exception as exc:
log(f"[M4B][UNEXPECTED ERROR] volume={volume}: {exc}")
store_m4b_error(
book_idx=book_idx,
volume=volume,
error_text=str(exc),
)
continue
log(f"[M4B] FINISHED book_idx={book_idx}")
# ------------------------------------------------------------
# Orchestration helper (UNCHANGED)
# ------------------------------------------------------------
@logcall
def queue_m4b_for_book(book_idx: str):
log(f"[M4B] Queuing m4b-tool for book_idx={book_idx}")
celery_app.send_task(
"scraper.tasks.m4b_tasks.run_m4btool",
args=[book_idx],
queue="m4b",
)

@ -1,149 +0,0 @@
# ============================================================
# File: scraper/tasks/statuscheck.py
# Purpose:
# Final status check after audio completion.
#
# Responsibilities:
# - Verify Redis counters (sanity check)
# - Verify filesystem (Audio files present)
# - Queue m4btool task
#
# Design rules:
# - Book-scope ONLY
# - No direct Redis usage
# - Repository is the single source of truth
# - Idempotent, defensive, non-blocking
# ============================================================
import os
from celery_app import celery_app
from logbus.publisher import log
from scraper.logger_decorators import logcall
from db.repository import (
get_audio_done,
get_chapters_total,
set_status,
fetch_book,
)
from scraper.tasks.m4b_tasks import run_m4btool
# ------------------------------------------------------------
# Helpers
# ------------------------------------------------------------
@log
def _detect_volumes(book_base: str):
"""
Return sorted list of Volume_XXX directories.
"""
vols = []
for name in os.listdir(book_base):
if name.lower().startswith("volume_"):
full = os.path.join(book_base, name)
if os.path.isdir(full):
vols.append(name)
vols.sort()
return vols
@logcall
def _count_audio_files(audio_dir: str) -> int:
"""
Count .m4b files in an Audio directory.
"""
if not os.path.isdir(audio_dir):
return 0
return len([f for f in os.listdir(audio_dir) if f.lower().endswith(".m4b")])
# ------------------------------------------------------------
# Celery task
# ------------------------------------------------------------
@celery_app.task(bind=True, queue="controller", ignore_result=True)
@logcall
def run_statuscheck(self, book_idx: str):
"""
Final statuscheck before m4btool execution.
Triggered exactly once by audio_completion quickcheck.
"""
log(f"[STATUSCHECK] START book={book_idx}")
# --------------------------------------------------------
# 1. Redis sanity check (via repository)
# --------------------------------------------------------
audio_done = get_audio_done(book_idx)
chapters_total = get_chapters_total(book_idx)
log(
f"[STATUSCHECK] Counters book={book_idx} "
f"audio_done={audio_done} chapters_total={chapters_total}"
)
if chapters_total <= 0:
log(f"[STATUSCHECK] No chapters_total → abort")
return
if audio_done < chapters_total:
# Defensive: should not happen, but never assume
log(
f"[STATUSCHECK] Audio not complete yet "
f"({audio_done}/{chapters_total}) → abort"
)
return
# --------------------------------------------------------
# 2. Fetch book metadata (for paths & m4b meta)
# --------------------------------------------------------
book = fetch_book(book_idx)
if not book:
log(f"[STATUSCHECK] Book not found in DB: {book_idx}")
return
title = book.get("title") or book_idx
author = book.get("author") or "Unknown"
# Base output directory
root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
book_base = os.path.join(root, title)
if not os.path.isdir(book_base):
log(f"[STATUSCHECK] Book directory missing: {book_base}")
return
# --------------------------------------------------------
# 3. Filesystem validation (light, non-blocking)
# --------------------------------------------------------
volumes = _detect_volumes(book_base)
if not volumes:
log(f"[STATUSCHECK] No volumes found for {book_idx}")
# Still allow m4btool to decide (it will no-op)
else:
for vol in volumes:
audio_dir = os.path.join(book_base, vol, "Audio")
count = _count_audio_files(audio_dir)
log(f"[STATUSCHECK] {vol}: " f"{count} audio files detected")
# --------------------------------------------------------
# 4. Queue m4btool (final pipeline step)
# --------------------------------------------------------
log(f"[STATUSCHECK] Queue m4btool for book={book_idx}")
set_status(book_idx, "m4b_running")
run_m4btool.delay(
book_idx=book_idx,
book_base=book_base,
meta={
"title": title,
"author": author,
},
)
log(f"[STATUSCHECK] DONE book={book_idx}")

@ -1,11 +1,8 @@
/* ======================================================================= /* =======================================================================
File: static/css/bookcard.css File: static/css/bookcard.css
Purpose: Purpose:
Styling voor registered book cards: All styling for registered book cards (book-card) +
- status kleuren status colors + start/abort buttons + progress bars
- badges
- start/abort/statuscheck
- progress bars
======================================================================= */ ======================================================================= */
/* ----------------------------------------------------------------------- /* -----------------------------------------------------------------------
@ -20,7 +17,7 @@
} }
/* ----------------------------------------------------------------------- /* -----------------------------------------------------------------------
BOOK CARD BASE BOOK CARD
----------------------------------------------------------------------- */ ----------------------------------------------------------------------- */
.book-card { .book-card {
@ -39,28 +36,34 @@
} }
/* ----------------------------------------------------------------------- /* -----------------------------------------------------------------------
STATUS COLORS (BOOK CARD BORDER) STATUS COLORS
----------------------------------------------------------------------- */ ----------------------------------------------------------------------- */
/* Downloading / actief bezig */ .book-card.processing {
border-color: #007aff;
box-shadow: 0 0 6px rgba(0, 122, 255, 0.35);
}
.book-card.downloading { .book-card.downloading {
border-color: #ff9500; border-color: #ff9500;
box-shadow: 0 0 6px rgba(255, 149, 0, 0.35); box-shadow: 0 0 6px rgba(255, 149, 0, 0.35);
} }
/* Audio fase */ .book-card.parsing {
border-color: #ffcc00;
box-shadow: 0 0 6px rgba(255, 204, 0, 0.35);
}
.book-card.audio { .book-card.audio {
border-color: #ffca28; border-color: #34c759;
box-shadow: 0 0 6px rgba(255, 202, 40, 0.35); box-shadow: 0 0 6px rgba(52, 199, 89, 0.35);
} }
/* Volledig klaar */ .book-card.completed {
.book-card.done { border-color: #34c759;
border: 2px solid #4caf50; box-shadow: 0 0 6px rgba(52, 199, 89, 0.35);
box-shadow: 0 0 6px rgba(76, 175, 80, 0.35);
} }
/* Afgebroken */
.book-card.aborted { .book-card.aborted {
border-color: #ff3b30; border-color: #ff3b30;
box-shadow: 0 0 6px rgba(255, 59, 48, 0.35); box-shadow: 0 0 6px rgba(255, 59, 48, 0.35);
@ -185,21 +188,6 @@
background: #555; background: #555;
} }
/* Statuscheck */
.statuscheck-btn {
background-color: #444;
color: #fff;
border: 1px solid #666;
margin-left: 4px;
padding: 4px 8px;
border-radius: 6px;
font-size: 12px;
cursor: pointer;
}
.statuscheck-btn:hover {
background-color: #333;
}
/* ----------------------------------------------------------------------- /* -----------------------------------------------------------------------
PROGRESS (FULL WIDTH) PROGRESS (FULL WIDTH)
----------------------------------------------------------------------- */ ----------------------------------------------------------------------- */
@ -213,7 +201,7 @@
} }
.progress-row { .progress-row {
margin-bottom: 4px; margin-bottom: 10px;
} }
.progress-label { .progress-label {
@ -237,12 +225,12 @@
transition: width 0.4s ease; transition: width 0.4s ease;
} }
/* Download */ /* Download = blauw */
.progressbar-fill.download { .progressbar-fill.download {
background: #2196f3; background: #2196f3;
} }
/* Audio */ /* Audio = groen */
.progressbar-fill.audio { .progressbar-fill.audio {
background: #4caf50; background: #4caf50;
} }
@ -261,50 +249,3 @@
text-shadow: 0 1px 2px rgba(0, 0, 0, 0.6); text-shadow: 0 1px 2px rgba(0, 0, 0, 0.6);
pointer-events: none; pointer-events: none;
} }
/* -----------------------------------------------------------------------
STATUS BADGE
----------------------------------------------------------------------- */
.status-badge {
display: inline-block;
margin-bottom: 6px;
padding: 2px 8px;
font-size: 11px;
font-weight: 600;
border-radius: 10px;
text-transform: uppercase;
letter-spacing: 0.5px;
cursor: default;
}
/* DONE */
.status-badge.status-done {
background-color: #e6f4ea;
color: #2e7d32;
border: 1px solid #4caf50;
}
/* AUDIO */
.status-badge.status-audio {
background-color: #fff8e1;
color: #8d6e00;
border: 1px solid #ffca28;
}
/* DOWNLOADING */
.status-badge.status-downloading {
background-color: #e3f2fd;
color: #1565c0;
border: 1px solid #42a5f5;
}
/* Statuscheck */
.icon-statuscheck {
background: #444;
}
.icon-statuscheck:hover {
background: #333;
transform: scale(1.05);
}

@ -84,8 +84,9 @@
.progress-box { .progress-box {
background: #fafafa; background: #fafafa;
border: 1px solid #ddd; border: 1px solid #ddd;
padding: 8px; padding: 18px;
border-radius: 6px; border-radius: 6px;
width: 100%;
} }
.progress-header h2 { .progress-header h2 {
@ -294,19 +295,3 @@
.dropdown-menu li a:hover { .dropdown-menu li a:hover {
background: #f0f0f0; background: #f0f0f0;
} }
table.kv {
border-collapse: collapse;
margin-bottom: 16px;
}
table.kv th {
text-align: left;
padding-right: 12px;
color: #777;
font-weight: normal;
}
table.kv td {
font-weight: 500;
}

@ -52,7 +52,6 @@ function updateSingleBookCard(card, state) {
console.log("[BOOKCARD] updateSingleBookCard", state.book_idx); console.log("[BOOKCARD] updateSingleBookCard", state.book_idx);
updateStatus(card, state); updateStatus(card, state);
updateStatusBadge(card, state);
updateButtons(card, state); updateButtons(card, state);
updateProgress(card, state); updateProgress(card, state);
} }
@ -65,21 +64,6 @@ function updateStatus(card, state) {
console.log("[BOOKCARD][STATUS]", state.book_idx, "→", state.status); console.log("[BOOKCARD][STATUS]", state.book_idx, "→", state.status);
card.className = `book-card ${state.status || ""}`; card.className = `book-card ${state.status || ""}`;
} }
function updateStatusBadge(card, state) {
const badge = card.querySelector(".status-badge");
if (!badge) return;
const status = (state.status || "").toLowerCase();
badge.textContent = status.toUpperCase();
badge.className = `status-badge status-${status}`;
badge.title =
{
downloading: "Bezig met downloaden",
audio: "Downloads compleet, audio wordt gegenereerd",
done: "Alle chapters en audio zijn compleet",
}[status] || "";
}
/* ============================================================ /* ============================================================
BUTTONS BUTTONS

@ -31,20 +31,6 @@ component) ============================================================ #}
<!-- META --> <!-- META -->
<div class="book-meta"> <div class="book-meta">
<!-- STATUS BADGE -->
{% if b.status %}
<span
class="status-badge status-{{ b.status }}"
title="
{% if b.status == 'done' %}Alle chapters en audio zijn compleet{% endif %}
{% if b.status == 'audio' %}Downloads compleet, audio wordt nog gegenereerd{% endif %}
{% if b.status == 'downloading' %}Bezig met downloaden{% endif %}
"
>
{{ b.status | upper }}
</span>
{% endif %}
<div class="book-title" data-field="title">{{ b.title }}</div> <div class="book-title" data-field="title">{{ b.title }}</div>
<div class="book-author" data-field="author">{{ b.author }}</div> <div class="book-author" data-field="author">{{ b.author }}</div>
<div class="book-created"> <div class="book-created">
@ -68,20 +54,6 @@ component) ============================================================ #}
<i class="fa-solid fa-stop"></i> <i class="fa-solid fa-stop"></i>
</button> </button>
</form> </form>
<form
method="post"
action="/inspect/statuscheck/{{ b.book_idx }}"
style="display: inline-block"
>
<button
type="submit"
class="icon-btn icon-statuscheck"
title="Herbereken status op basis van bestanden"
>
<i class="fa-solid fa-magnifying-glass-chart"></i>
</button>
</form>
</div> </div>
</div> </div>

@ -22,11 +22,25 @@
<hr /> <hr />
{% include "components/registered_books.html" %}
<hr />
<!-- =========================================================== <!-- ===========================================================
BOOK LIST BOOK LIST
=========================================================== --> =========================================================== -->
<section class="dashboard-section">
<h2>Actieve boeken</h2>
{% if books and books|length > 0 %}
<div id="book-list" class="book-list">
{% for book in books %} {% include "components/book_list_item.html" %} {%
endfor %}
</div>
{% else %}
<div id="book-list" class="book-list-empty">Geen actieve boeken.</div>
{% endif %}
</section>
{% include "components/registered_books.html" %}
<hr /> <hr />
<!-- =========================================================== <!-- ===========================================================

@ -1,115 +0,0 @@
{% extends "layout.html" %} {% block content %}
<h2>Statuscheck Inspect</h2>
{% if error %}
<div class="error"><strong>Fout:</strong> {{ error }}</div>
{% else %}
<!-- ===================================================== -->
<!-- BOEK -->
<!-- ===================================================== -->
<h3>Boek</h3>
<table class="kv">
<tr>
<th>Book idx</th>
<td>{{ result.book_idx }}</td>
</tr>
<tr>
<th>Pad</th>
<td>{{ result.filesystem.book_dir }}</td>
</tr>
<tr>
<th>Bestaat</th>
<td>{{ result.filesystem.exists }}</td>
</tr>
</table>
<!-- ===================================================== -->
<!-- FILESYSTEM -->
<!-- ===================================================== -->
<h3>Filesystem (source of truth)</h3>
<table class="kv">
<tr>
<th>Volumes</th>
<td>{{ result.filesystem.volumes }}</td>
</tr>
<tr>
<th>Chapters (.txt)</th>
<td>{{ result.filesystem.chapters_txt }}</td>
</tr>
<tr>
<th>Audio (.m4b)</th>
<td>{{ result.filesystem.audio_files }}</td>
</tr>
</table>
<!-- ===================================================== -->
<!-- SQL -->
<!-- ===================================================== -->
<h3>SQL snapshot</h3>
<h4>Voor</h4>
<table class="kv">
<tr>
<th>Downloaded</th>
<td>{{ result.sql_before.downloaded }}</td>
</tr>
<tr>
<th>Audio done</th>
<td>{{ result.sql_before.audio_done }}</td>
</tr>
<tr>
<th>Status</th>
<td>{{ result.sql_before.status }}</td>
</tr>
</table>
<h4>Na</h4>
<table class="kv">
<tr>
<th>Downloaded</th>
<td>{{ result.sql_after.downloaded }}</td>
</tr>
<tr>
<th>Audio done</th>
<td>{{ result.sql_after.audio_done }}</td>
</tr>
<tr>
<th>Last update</th>
<td>{{ result.sql_after.last_update }}</td>
</tr>
</table>
<!-- ===================================================== -->
<!-- REPOSITORY -->
<!-- ===================================================== -->
<h3>Repository merged state (UI input)</h3>
<table class="kv">
<tr>
<th>Downloaded</th>
<td>{{ repo_state.downloaded }}</td>
</tr>
<tr>
<th>Audio done</th>
<td>{{ repo_state.audio_done }}</td>
</tr>
<tr>
<th>Chapters total</th>
<td>{{ repo_state.chapters_total }}</td>
</tr>
</table>
<details>
<summary>Raw repository state</summary>
<pre>{{ repo_state | tojson(indent=2) }}</pre>
</details>
{% endif %}
<hr />
<a href="/dashboard">← Terug naar dashboard</a>
{% endblock %}

@ -1,13 +0,0 @@
#!/bin/sh
# mp4info shim for m4b-tool (ffprobe-based)
if [ -z "$1" ]; then
echo "Usage: mp4info <file>" >&2
exit 1
fi
# ffprobe outputs float seconds; m4b-tool expects an integer
ffprobe -v error \
-show_entries format=duration \
-of default=noprint_wrappers=1:nokey=1 \
"$1" | awk '{ printf "%d\n", ($1 + 0.5) }'
Loading…
Cancel
Save