From 3a7cc7687c6ba4453afead448420c29cf2d32eb4 Mon Sep 17 00:00:00 2001 From: "peter.fong" Date: Sat, 13 Dec 2025 20:33:21 +0100 Subject: [PATCH] inspect state accepted --- bookscraper/app.py | 33 +++++ bookscraper/db/repository.py | 77 ++++++---- .../scraper/services/status_check_service.py | 135 ++++++++++++++++++ bookscraper/static/css/bookcard.css | 8 +- bookscraper/static/css/dashboard.css | 19 ++- .../templates/components/bookcard.html | 14 ++ .../templates/inspect/statuscheck_result.html | 115 +++++++++++++++ 7 files changed, 370 insertions(+), 31 deletions(-) create mode 100644 bookscraper/scraper/services/status_check_service.py create mode 100644 bookscraper/templates/inspect/statuscheck_result.html diff --git a/bookscraper/app.py b/bookscraper/app.py index e0400d2..b560ca2 100644 --- a/bookscraper/app.py +++ b/bookscraper/app.py @@ -349,6 +349,39 @@ def logs(): return jsonify({"lines": new_lines, "last": new_last}) +from flask import render_template +from scraper.services.status_check_service import StatusCheckService +from logbus.publisher import log + + +from db.repository import get_book_state + + +@app.route("/inspect/statuscheck/", methods=["POST"]) +def inspect_statuscheck(book_idx): + try: + statuscheck_result = StatusCheckService.run(book_idx) + + repo_state = get_book_state(book_idx) + + return render_template( + "inspect/statuscheck_result.html", + result=statuscheck_result, + repo_state=repo_state, + ) + + except Exception as e: + log(f"[STATUSCHECK] ERROR book_idx={book_idx}: {e}") + return ( + render_template( + "inspect/statuscheck_result.html", + error=str(e), + book_idx=book_idx, + ), + 500, + ) + + # ===================================================== # SECTION 4 — DEBUG ROUTES # ===================================================== diff --git a/bookscraper/db/repository.py b/bookscraper/db/repository.py index 989482f..d64f25d 100644 --- a/bookscraper/db/repository.py +++ b/bookscraper/db/repository.py @@ -304,53 +304,74 @@ def inc_audio_done_legacy(book_idx, amount=1): # ============================================================ # READ — DERIVED BOOK STATE # ============================================================ - - @logcall def get_book_state(book_idx): """ - Canonical read-model for a single book. - - Responsibilities: - - Read SQLite snapshot (static metadata) - - Read Redis live state (counters / status) - - Compute derived fields (NO UI logic) - - Invariants: - - downloaded = chapters_download_done + chapters_download_skipped + Canonical merged read-model for a single book. + + Gedrag: + - Leest SQL (snapshot) + - Leest Redis (live counters) + - Rekent naar merged + - GEEN writes + - GEEN side-effects + + Merge-regels: + - merged = max(sql, redis) + - merged wordt gecapt op chapters_total """ - # --- SQLite snapshot --- + # ---------------------------------------------------- + # 1. Fetch bronnen + # ---------------------------------------------------- sqlite_row = sql_fetch_book(book_idx) or {} - # --- Redis live state --- key = f"book:{book_idx}:state" redis_state = _r.hgetall(key) or {} - # Normalize numeric redis values def _int(v): try: return int(v) except Exception: return 0 - # --- primary counters --- - chapters_done = _int(redis_state.get("chapters_download_done")) - chapters_skipped = _int(redis_state.get("chapters_download_skipped")) - - # --- derived counters --- - downloaded = chapters_done + chapters_skipped - - # --- build canonical state --- + # ---------------------------------------------------- + # 2. SQL snapshot + # ---------------------------------------------------- + chapters_total = _int(sqlite_row.get("chapters_total")) + sql_downloaded = _int(sqlite_row.get("downloaded")) + sql_audio_done = _int(sqlite_row.get("audio_done")) + + # ---------------------------------------------------- + # 3. Redis live counters + # ---------------------------------------------------- + redis_downloaded = _int(redis_state.get("chapters_download_done")) + _int( + redis_state.get("chapters_download_skipped") + ) + + redis_audio_done = _int(redis_state.get("audio_done")) + + # ---------------------------------------------------- + # 4. Merge (SQL vs Redis) + # ---------------------------------------------------- + merged_downloaded = max(sql_downloaded, redis_downloaded) + merged_audio_done = max(sql_audio_done, redis_audio_done) + + if chapters_total > 0: + merged_downloaded = min(merged_downloaded, chapters_total) + merged_audio_done = min(merged_audio_done, chapters_total) + + # ---------------------------------------------------- + # 5. Bouw merged state (read-only) + # ---------------------------------------------------- state = {} - # 1) start with SQLite snapshot + # Basis = SQL state.update(sqlite_row) - # 2) overlay Redis live fields - state.update(redis_state) - - # 3) enforce derived invariants - state["downloaded"] = downloaded + # Overschrijf alleen met merged conclusies + state["downloaded"] = merged_downloaded + state["audio_done"] = merged_audio_done + state["chapters_total"] = chapters_total return state diff --git a/bookscraper/scraper/services/status_check_service.py b/bookscraper/scraper/services/status_check_service.py new file mode 100644 index 0000000..ce70d0e --- /dev/null +++ b/bookscraper/scraper/services/status_check_service.py @@ -0,0 +1,135 @@ +# ============================================================ +# File: scraper/services/status_check_service.py +# Purpose: +# Handmatige, idempotente statuscheck per boek. +# +# Bepaalt op basis van het filesystem: +# - aantal gedownloade chapters (.txt) +# - aantal gegenereerde audiofiles (.m4b) +# +# En schrijft deze gevalideerde werkelijkheid naar SQL. +# +# LET OP: +# - Geen Redis +# - Geen Celery +# - Geen status-transities +# - Geen pipeline-logica +# ============================================================ + +import os +from datetime import datetime +from typing import Dict, Any + +from logbus.publisher import log +from scraper.logger_decorators import logcall + +from db.state_sql import sql_fetch_book, sql_update_book + + +class StatusCheckService: + """ + Statuscheck op basis van filesystem. + Single source of truth = disk. + """ + + @staticmethod + @logcall + def run(book_idx: str) -> Dict[str, Any]: + """ + Voer statuscheck uit voor één boek. + + Returns een inspecteerbaar dict met: + - filesystem tellingen + - SQL before / after snapshot + """ + + # ---------------------------------------------------- + # 1. SQL fetch (bestaat het boek?) + # ---------------------------------------------------- + sql_before = sql_fetch_book(book_idx) + + if not sql_before: + raise ValueError(f"[STATUSCHECK] Book not found in SQL: {book_idx}") + + # ---------------------------------------------------- + # 2. Bepaal filesystem root + # ---------------------------------------------------- + output_root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output") + title = sql_before.get("title") + book_dir = os.path.join(output_root, title) + + if not os.path.isdir(book_dir): + log( + f"[STATUSCHECK] No output directory for book_idx={book_idx} : title='{title}')" + ) + chapters_txt = 0 + audio_files = 0 + volumes = 0 + else: + chapters_txt = 0 + audio_files = 0 + volumes = 0 + + # ------------------------------------------------ + # 3. Scan volumes + # ------------------------------------------------ + for entry in os.listdir(book_dir): + if not entry.lower().startswith("volume_"): + continue + + volumes += 1 + volume_path = os.path.join(book_dir, entry) + + if not os.path.isdir(volume_path): + continue + + # ---- TXT chapters ---- + for fname in os.listdir(volume_path): + if fname.lower().endswith(".txt"): + chapters_txt += 1 + + # ---- Audio ---- + audio_dir = os.path.join(volume_path, "Audio") + if os.path.isdir(audio_dir): + for fname in os.listdir(audio_dir): + if fname.lower().endswith(".m4b"): + audio_files += 1 + + # ---------------------------------------------------- + # 4. SQL update (snapshot) + # ---------------------------------------------------- + now = datetime.utcnow().isoformat(timespec="seconds") + + update_fields = { + "downloaded": chapters_txt, + "audio_done": audio_files, + "last_update": now, + } + + sql_update_book(book_idx, update_fields) + + sql_after = sql_fetch_book(book_idx) + + # ---------------------------------------------------- + # 5. Resultaat voor inspect/debug + # ---------------------------------------------------- + result = { + "book_idx": book_idx, + "filesystem": { + "book_dir": book_dir, + "exists": os.path.isdir(book_dir), + "volumes": volumes, + "chapters_txt": chapters_txt, + "audio_files": audio_files, + }, + "sql_before": sql_before, + "sql_after": sql_after, + "notes": [], + } + + log( + f"[STATUSCHECK] book_idx={book_idx} " + f"chapters={chapters_txt} audio={audio_files}" + ) + + return result diff --git a/bookscraper/static/css/bookcard.css b/bookscraper/static/css/bookcard.css index 78be639..ae46acc 100644 --- a/bookscraper/static/css/bookcard.css +++ b/bookscraper/static/css/bookcard.css @@ -201,7 +201,7 @@ } .progress-row { - margin-bottom: 10px; + margin-bottom: 2px; } .progress-label { @@ -249,3 +249,9 @@ text-shadow: 0 1px 2px rgba(0, 0, 0, 0.6); pointer-events: none; } +.statuscheck-btn { + background-color: #444; + color: #fff; + border: 1px solid #666; + margin-left: 4px; +} diff --git a/bookscraper/static/css/dashboard.css b/bookscraper/static/css/dashboard.css index e7aa67a..6f93f8a 100644 --- a/bookscraper/static/css/dashboard.css +++ b/bookscraper/static/css/dashboard.css @@ -84,9 +84,8 @@ .progress-box { background: #fafafa; border: 1px solid #ddd; - padding: 18px; + padding: 8px; border-radius: 6px; - width: 100%; } .progress-header h2 { @@ -295,3 +294,19 @@ .dropdown-menu li a:hover { background: #f0f0f0; } + +table.kv { + border-collapse: collapse; + margin-bottom: 16px; +} + +table.kv th { + text-align: left; + padding-right: 12px; + color: #777; + font-weight: normal; +} + +table.kv td { + font-weight: 500; +} diff --git a/bookscraper/templates/components/bookcard.html b/bookscraper/templates/components/bookcard.html index 023a6bb..a316bc1 100644 --- a/bookscraper/templates/components/bookcard.html +++ b/bookscraper/templates/components/bookcard.html @@ -54,6 +54,20 @@ component) ============================================================ #} + +
+ +
diff --git a/bookscraper/templates/inspect/statuscheck_result.html b/bookscraper/templates/inspect/statuscheck_result.html new file mode 100644 index 0000000..ee9a2c4 --- /dev/null +++ b/bookscraper/templates/inspect/statuscheck_result.html @@ -0,0 +1,115 @@ +{% extends "layout.html" %} {% block content %} + +

Statuscheck – Inspect

+ +{% if error %} +
Fout: {{ error }}
+{% else %} + + + + +

Boek

+ + + + + + + + + + + + + +
Book idx{{ result.book_idx }}
Pad{{ result.filesystem.book_dir }}
Bestaat{{ result.filesystem.exists }}
+ + + + +

Filesystem (source of truth)

+ + + + + + + + + + + + + +
Volumes{{ result.filesystem.volumes }}
Chapters (.txt){{ result.filesystem.chapters_txt }}
Audio (.m4b){{ result.filesystem.audio_files }}
+ + + + +

SQL snapshot

+ +

Voor

+ + + + + + + + + + + + + +
Downloaded{{ result.sql_before.downloaded }}
Audio done{{ result.sql_before.audio_done }}
Status{{ result.sql_before.status }}
+ +

Na

+ + + + + + + + + + + + + +
Downloaded{{ result.sql_after.downloaded }}
Audio done{{ result.sql_after.audio_done }}
Last update{{ result.sql_after.last_update }}
+ + + + +

Repository merged state (UI input)

+ + + + + + + + + + + + + + +
Downloaded{{ repo_state.downloaded }}
Audio done{{ repo_state.audio_done }}
Chapters total{{ repo_state.chapters_total }}
+ +
+ Raw repository state +
{{ repo_state | tojson(indent=2) }}
+
+ +{% endif %} + +
+ +← Terug naar dashboard + +{% endblock %}