inspect state accepted

feature/pipeline-finalization
peter.fong 3 days ago
parent 516bca6de5
commit 3a7cc7687c

@ -349,6 +349,39 @@ def logs():
return jsonify({"lines": new_lines, "last": new_last})
from flask import render_template
from scraper.services.status_check_service import StatusCheckService
from logbus.publisher import log
from db.repository import get_book_state
@app.route("/inspect/statuscheck/<book_idx>", methods=["POST"])
def inspect_statuscheck(book_idx):
try:
statuscheck_result = StatusCheckService.run(book_idx)
repo_state = get_book_state(book_idx)
return render_template(
"inspect/statuscheck_result.html",
result=statuscheck_result,
repo_state=repo_state,
)
except Exception as e:
log(f"[STATUSCHECK] ERROR book_idx={book_idx}: {e}")
return (
render_template(
"inspect/statuscheck_result.html",
error=str(e),
book_idx=book_idx,
),
500,
)
# =====================================================
# SECTION 4 — DEBUG ROUTES
# =====================================================

@ -304,53 +304,74 @@ def inc_audio_done_legacy(book_idx, amount=1):
# ============================================================
# READ — DERIVED BOOK STATE
# ============================================================
@logcall
def get_book_state(book_idx):
"""
Canonical read-model for a single book.
Responsibilities:
- Read SQLite snapshot (static metadata)
- Read Redis live state (counters / status)
- Compute derived fields (NO UI logic)
Invariants:
- downloaded = chapters_download_done + chapters_download_skipped
Canonical merged read-model for a single book.
Gedrag:
- Leest SQL (snapshot)
- Leest Redis (live counters)
- Rekent naar merged
- GEEN writes
- GEEN side-effects
Merge-regels:
- merged = max(sql, redis)
- merged wordt gecapt op chapters_total
"""
# --- SQLite snapshot ---
# ----------------------------------------------------
# 1. Fetch bronnen
# ----------------------------------------------------
sqlite_row = sql_fetch_book(book_idx) or {}
# --- Redis live state ---
key = f"book:{book_idx}:state"
redis_state = _r.hgetall(key) or {}
# Normalize numeric redis values
def _int(v):
try:
return int(v)
except Exception:
return 0
# --- primary counters ---
chapters_done = _int(redis_state.get("chapters_download_done"))
chapters_skipped = _int(redis_state.get("chapters_download_skipped"))
# --- derived counters ---
downloaded = chapters_done + chapters_skipped
# --- build canonical state ---
# ----------------------------------------------------
# 2. SQL snapshot
# ----------------------------------------------------
chapters_total = _int(sqlite_row.get("chapters_total"))
sql_downloaded = _int(sqlite_row.get("downloaded"))
sql_audio_done = _int(sqlite_row.get("audio_done"))
# ----------------------------------------------------
# 3. Redis live counters
# ----------------------------------------------------
redis_downloaded = _int(redis_state.get("chapters_download_done")) + _int(
redis_state.get("chapters_download_skipped")
)
redis_audio_done = _int(redis_state.get("audio_done"))
# ----------------------------------------------------
# 4. Merge (SQL vs Redis)
# ----------------------------------------------------
merged_downloaded = max(sql_downloaded, redis_downloaded)
merged_audio_done = max(sql_audio_done, redis_audio_done)
if chapters_total > 0:
merged_downloaded = min(merged_downloaded, chapters_total)
merged_audio_done = min(merged_audio_done, chapters_total)
# ----------------------------------------------------
# 5. Bouw merged state (read-only)
# ----------------------------------------------------
state = {}
# 1) start with SQLite snapshot
# Basis = SQL
state.update(sqlite_row)
# 2) overlay Redis live fields
state.update(redis_state)
# 3) enforce derived invariants
state["downloaded"] = downloaded
# Overschrijf alleen met merged conclusies
state["downloaded"] = merged_downloaded
state["audio_done"] = merged_audio_done
state["chapters_total"] = chapters_total
return state

@ -0,0 +1,135 @@
# ============================================================
# File: scraper/services/status_check_service.py
# Purpose:
# Handmatige, idempotente statuscheck per boek.
#
# Bepaalt op basis van het filesystem:
# - aantal gedownloade chapters (.txt)
# - aantal gegenereerde audiofiles (.m4b)
#
# En schrijft deze gevalideerde werkelijkheid naar SQL.
#
# LET OP:
# - Geen Redis
# - Geen Celery
# - Geen status-transities
# - Geen pipeline-logica
# ============================================================
import os
from datetime import datetime
from typing import Dict, Any
from logbus.publisher import log
from scraper.logger_decorators import logcall
from db.state_sql import sql_fetch_book, sql_update_book
class StatusCheckService:
"""
Statuscheck op basis van filesystem.
Single source of truth = disk.
"""
@staticmethod
@logcall
def run(book_idx: str) -> Dict[str, Any]:
"""
Voer statuscheck uit voor één boek.
Returns een inspecteerbaar dict met:
- filesystem tellingen
- SQL before / after snapshot
"""
# ----------------------------------------------------
# 1. SQL fetch (bestaat het boek?)
# ----------------------------------------------------
sql_before = sql_fetch_book(book_idx)
if not sql_before:
raise ValueError(f"[STATUSCHECK] Book not found in SQL: {book_idx}")
# ----------------------------------------------------
# 2. Bepaal filesystem root
# ----------------------------------------------------
output_root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
title = sql_before.get("title")
book_dir = os.path.join(output_root, title)
if not os.path.isdir(book_dir):
log(
f"[STATUSCHECK] No output directory for book_idx={book_idx} : title='{title}')"
)
chapters_txt = 0
audio_files = 0
volumes = 0
else:
chapters_txt = 0
audio_files = 0
volumes = 0
# ------------------------------------------------
# 3. Scan volumes
# ------------------------------------------------
for entry in os.listdir(book_dir):
if not entry.lower().startswith("volume_"):
continue
volumes += 1
volume_path = os.path.join(book_dir, entry)
if not os.path.isdir(volume_path):
continue
# ---- TXT chapters ----
for fname in os.listdir(volume_path):
if fname.lower().endswith(".txt"):
chapters_txt += 1
# ---- Audio ----
audio_dir = os.path.join(volume_path, "Audio")
if os.path.isdir(audio_dir):
for fname in os.listdir(audio_dir):
if fname.lower().endswith(".m4b"):
audio_files += 1
# ----------------------------------------------------
# 4. SQL update (snapshot)
# ----------------------------------------------------
now = datetime.utcnow().isoformat(timespec="seconds")
update_fields = {
"downloaded": chapters_txt,
"audio_done": audio_files,
"last_update": now,
}
sql_update_book(book_idx, update_fields)
sql_after = sql_fetch_book(book_idx)
# ----------------------------------------------------
# 5. Resultaat voor inspect/debug
# ----------------------------------------------------
result = {
"book_idx": book_idx,
"filesystem": {
"book_dir": book_dir,
"exists": os.path.isdir(book_dir),
"volumes": volumes,
"chapters_txt": chapters_txt,
"audio_files": audio_files,
},
"sql_before": sql_before,
"sql_after": sql_after,
"notes": [],
}
log(
f"[STATUSCHECK] book_idx={book_idx} "
f"chapters={chapters_txt} audio={audio_files}"
)
return result

@ -201,7 +201,7 @@
}
.progress-row {
margin-bottom: 10px;
margin-bottom: 2px;
}
.progress-label {
@ -249,3 +249,9 @@
text-shadow: 0 1px 2px rgba(0, 0, 0, 0.6);
pointer-events: none;
}
.statuscheck-btn {
background-color: #444;
color: #fff;
border: 1px solid #666;
margin-left: 4px;
}

@ -84,9 +84,8 @@
.progress-box {
background: #fafafa;
border: 1px solid #ddd;
padding: 18px;
padding: 8px;
border-radius: 6px;
width: 100%;
}
.progress-header h2 {
@ -295,3 +294,19 @@
.dropdown-menu li a:hover {
background: #f0f0f0;
}
table.kv {
border-collapse: collapse;
margin-bottom: 16px;
}
table.kv th {
text-align: left;
padding-right: 12px;
color: #777;
font-weight: normal;
}
table.kv td {
font-weight: 500;
}

@ -54,6 +54,20 @@ component) ============================================================ #}
<i class="fa-solid fa-stop"></i>
</button>
</form>
<form
method="post"
action="/inspect/statuscheck/{{ b.book_idx }}"
style="display: inline-block"
>
<button
type="submit"
class="statuscheck-btn"
title="Herbereken status op basis van bestanden"
>
Statuscheck
</button>
</form>
</div>
</div>

@ -0,0 +1,115 @@
{% extends "layout.html" %} {% block content %}
<h2>Statuscheck Inspect</h2>
{% if error %}
<div class="error"><strong>Fout:</strong> {{ error }}</div>
{% else %}
<!-- ===================================================== -->
<!-- BOEK -->
<!-- ===================================================== -->
<h3>Boek</h3>
<table class="kv">
<tr>
<th>Book idx</th>
<td>{{ result.book_idx }}</td>
</tr>
<tr>
<th>Pad</th>
<td>{{ result.filesystem.book_dir }}</td>
</tr>
<tr>
<th>Bestaat</th>
<td>{{ result.filesystem.exists }}</td>
</tr>
</table>
<!-- ===================================================== -->
<!-- FILESYSTEM -->
<!-- ===================================================== -->
<h3>Filesystem (source of truth)</h3>
<table class="kv">
<tr>
<th>Volumes</th>
<td>{{ result.filesystem.volumes }}</td>
</tr>
<tr>
<th>Chapters (.txt)</th>
<td>{{ result.filesystem.chapters_txt }}</td>
</tr>
<tr>
<th>Audio (.m4b)</th>
<td>{{ result.filesystem.audio_files }}</td>
</tr>
</table>
<!-- ===================================================== -->
<!-- SQL -->
<!-- ===================================================== -->
<h3>SQL snapshot</h3>
<h4>Voor</h4>
<table class="kv">
<tr>
<th>Downloaded</th>
<td>{{ result.sql_before.downloaded }}</td>
</tr>
<tr>
<th>Audio done</th>
<td>{{ result.sql_before.audio_done }}</td>
</tr>
<tr>
<th>Status</th>
<td>{{ result.sql_before.status }}</td>
</tr>
</table>
<h4>Na</h4>
<table class="kv">
<tr>
<th>Downloaded</th>
<td>{{ result.sql_after.downloaded }}</td>
</tr>
<tr>
<th>Audio done</th>
<td>{{ result.sql_after.audio_done }}</td>
</tr>
<tr>
<th>Last update</th>
<td>{{ result.sql_after.last_update }}</td>
</tr>
</table>
<!-- ===================================================== -->
<!-- REPOSITORY -->
<!-- ===================================================== -->
<h3>Repository merged state (UI input)</h3>
<table class="kv">
<tr>
<th>Downloaded</th>
<td>{{ repo_state.downloaded }}</td>
</tr>
<tr>
<th>Audio done</th>
<td>{{ repo_state.audio_done }}</td>
</tr>
<tr>
<th>Chapters total</th>
<td>{{ repo_state.chapters_total }}</td>
</tr>
</table>
<details>
<summary>Raw repository state</summary>
<pre>{{ repo_state | tojson(indent=2) }}</pre>
</details>
{% endif %}
<hr />
<a href="/dashboard">← Terug naar dashboard</a>
{% endblock %}
Loading…
Cancel
Save