parent
516bca6de5
commit
3a7cc7687c
@ -0,0 +1,135 @@
|
||||
# ============================================================
|
||||
# File: scraper/services/status_check_service.py
|
||||
# Purpose:
|
||||
# Handmatige, idempotente statuscheck per boek.
|
||||
#
|
||||
# Bepaalt op basis van het filesystem:
|
||||
# - aantal gedownloade chapters (.txt)
|
||||
# - aantal gegenereerde audiofiles (.m4b)
|
||||
#
|
||||
# En schrijft deze gevalideerde werkelijkheid naar SQL.
|
||||
#
|
||||
# LET OP:
|
||||
# - Geen Redis
|
||||
# - Geen Celery
|
||||
# - Geen status-transities
|
||||
# - Geen pipeline-logica
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any
|
||||
|
||||
from logbus.publisher import log
|
||||
from scraper.logger_decorators import logcall
|
||||
|
||||
from db.state_sql import sql_fetch_book, sql_update_book
|
||||
|
||||
|
||||
class StatusCheckService:
|
||||
"""
|
||||
Statuscheck op basis van filesystem.
|
||||
Single source of truth = disk.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
@logcall
|
||||
def run(book_idx: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Voer statuscheck uit voor één boek.
|
||||
|
||||
Returns een inspecteerbaar dict met:
|
||||
- filesystem tellingen
|
||||
- SQL before / after snapshot
|
||||
"""
|
||||
|
||||
# ----------------------------------------------------
|
||||
# 1. SQL fetch (bestaat het boek?)
|
||||
# ----------------------------------------------------
|
||||
sql_before = sql_fetch_book(book_idx)
|
||||
|
||||
if not sql_before:
|
||||
raise ValueError(f"[STATUSCHECK] Book not found in SQL: {book_idx}")
|
||||
|
||||
# ----------------------------------------------------
|
||||
# 2. Bepaal filesystem root
|
||||
# ----------------------------------------------------
|
||||
output_root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
|
||||
title = sql_before.get("title")
|
||||
book_dir = os.path.join(output_root, title)
|
||||
|
||||
if not os.path.isdir(book_dir):
|
||||
log(
|
||||
f"[STATUSCHECK] No output directory for book_idx={book_idx} : title='{title}')"
|
||||
)
|
||||
chapters_txt = 0
|
||||
audio_files = 0
|
||||
volumes = 0
|
||||
else:
|
||||
chapters_txt = 0
|
||||
audio_files = 0
|
||||
volumes = 0
|
||||
|
||||
# ------------------------------------------------
|
||||
# 3. Scan volumes
|
||||
# ------------------------------------------------
|
||||
for entry in os.listdir(book_dir):
|
||||
if not entry.lower().startswith("volume_"):
|
||||
continue
|
||||
|
||||
volumes += 1
|
||||
volume_path = os.path.join(book_dir, entry)
|
||||
|
||||
if not os.path.isdir(volume_path):
|
||||
continue
|
||||
|
||||
# ---- TXT chapters ----
|
||||
for fname in os.listdir(volume_path):
|
||||
if fname.lower().endswith(".txt"):
|
||||
chapters_txt += 1
|
||||
|
||||
# ---- Audio ----
|
||||
audio_dir = os.path.join(volume_path, "Audio")
|
||||
if os.path.isdir(audio_dir):
|
||||
for fname in os.listdir(audio_dir):
|
||||
if fname.lower().endswith(".m4b"):
|
||||
audio_files += 1
|
||||
|
||||
# ----------------------------------------------------
|
||||
# 4. SQL update (snapshot)
|
||||
# ----------------------------------------------------
|
||||
now = datetime.utcnow().isoformat(timespec="seconds")
|
||||
|
||||
update_fields = {
|
||||
"downloaded": chapters_txt,
|
||||
"audio_done": audio_files,
|
||||
"last_update": now,
|
||||
}
|
||||
|
||||
sql_update_book(book_idx, update_fields)
|
||||
|
||||
sql_after = sql_fetch_book(book_idx)
|
||||
|
||||
# ----------------------------------------------------
|
||||
# 5. Resultaat voor inspect/debug
|
||||
# ----------------------------------------------------
|
||||
result = {
|
||||
"book_idx": book_idx,
|
||||
"filesystem": {
|
||||
"book_dir": book_dir,
|
||||
"exists": os.path.isdir(book_dir),
|
||||
"volumes": volumes,
|
||||
"chapters_txt": chapters_txt,
|
||||
"audio_files": audio_files,
|
||||
},
|
||||
"sql_before": sql_before,
|
||||
"sql_after": sql_after,
|
||||
"notes": [],
|
||||
}
|
||||
|
||||
log(
|
||||
f"[STATUSCHECK] book_idx={book_idx} "
|
||||
f"chapters={chapters_txt} audio={audio_files}"
|
||||
)
|
||||
|
||||
return result
|
||||
Loading…
Reference in new issue