diff --git a/bookscraper/scraper/download_controller.py b/bookscraper/scraper/download_controller.py index a75fe47..9a9e978 100644 --- a/bookscraper/scraper/download_controller.py +++ b/bookscraper/scraper/download_controller.py @@ -5,6 +5,7 @@ # and pass book_id for abort/progress/log functionality. # + Download and replicate cover image to all volume folders # + Generate scripts (allinone.txt, makebook, say) +# + Initialize Redis Book State Model (status + counters) # ========================================================= from celery import group @@ -16,6 +17,13 @@ import requests import shutil from scraper.abort import abort_requested # DEBUG allowed +# NEW: Redis State Model (C&U) +from scraper.progress import ( + init_book_state, + set_status, + set_chapter_total, +) + class DownloadController: """ @@ -26,6 +34,7 @@ class DownloadController: - book_id-based abort + progress tracking - cover download + volume replication - script generation (allinone.txt, makebook, say) + - Redis book state initialisation and status updates """ def __init__(self, book_id: str, scrape_result: dict): @@ -66,6 +75,20 @@ class DownloadController: except Exception as e: log(f"[CTRL_DEBUG] abort_requested ERROR: {e}") + # ------------------------------------------------- + # NEW: Initialize Redis Book State Model + # ------------------------------------------------- + try: + init_book_state( + book_id=self.book_id, + title=self.title, + url=self.scrape_result.get("book_url"), + chapters_total=len(self.chapters), + ) + log(f"[CTRL_STATE] init_book_state() completed for {self.title}") + except Exception as e: + log(f"[CTRL_STATE] init_book_state FAILED: {e}") + # --------------------------------------------------------- # Cover Download # --------------------------------------------------------- @@ -144,6 +167,16 @@ class DownloadController: ) log(f"[CTRL] Output root: {self.book_base}") + # ------------------------------------- + # NEW: Redis state update + # ------------------------------------- + try: + set_status(self.book_id, "downloading") + set_chapter_total(self.book_id, total) + log(f"[CTRL_STATE] Status set to 'downloading' for {self.book_id}") + except Exception as e: + log(f"[CTRL_STATE] set_status/set_chapter_total FAILED: {e}") + # ------------------------------------- # 1) Download cover # ------------------------------------- diff --git a/bookscraper/scraper/progress.py b/bookscraper/scraper/progress.py index 6156c9e..4fd09db 100644 --- a/bookscraper/scraper/progress.py +++ b/bookscraper/scraper/progress.py @@ -1,15 +1,22 @@ # ============================================================ # File: scraper/progress.py -# Purpose: Track chapter counters for WebGUI progress. +# Purpose: Track chapter counters for WebGUI progress + +# Book State Model (Redis-backed). # ============================================================ import os +import time import redis REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0") r = redis.Redis.from_url(REDIS_URL, decode_responses=True) +# ============================================================ +# LEGACY PROGRESS FUNCTIONS (ONAANGEROERD BEHOUDEN) +# ============================================================ + + # ------------------------------------------------------------ # SET TOTAL # ------------------------------------------------------------ @@ -45,7 +52,7 @@ def get_failed_list(book_id: str): # ------------------------------------------------------------ -# READ STRUCT FOR UI +# READ STRUCT FOR UI (legacy view) # ------------------------------------------------------------ def get_progress(book_id: str): total = int(r.get(f"progress:{book_id}:total") or 0) @@ -64,3 +71,118 @@ def get_progress(book_id: str): "failed_list": failed_list, "abort": abort, } + + +# ============================================================ +# BOOK STATE MODEL (NIEUWE FUNCTIES — GEEN BREAKING CHANGES) +# ============================================================ + + +# ------------------------------------------------------------ +# Initialize book state at start of scrape +# ------------------------------------------------------------ +def init_book_state( + book_id: str, title: str = "", url: str = "", chapters_total: int = 0 +): + key = f"book:{book_id}:state" + now = int(time.time()) + + r.hset( + key, + mapping={ + "book_id": book_id, + "title": title or "", + "url": url or "", + "status": "scraping", + "chapters_total": chapters_total, + "chapters_done": 0, + "audio_total": 0, + "audio_done": 0, + "last_update": now, + }, + ) + + # Track in library list + r.sadd("books", book_id) + + +# ------------------------------------------------------------ +# Status + timestamps +# ------------------------------------------------------------ +def set_status(book_id: str, status: str): + key = f"book:{book_id}:state" + r.hset(key, "status", status) + r.hset(key, "last_update", int(time.time())) + + +def set_last_update(book_id: str): + r.hset(f"book:{book_id}:state", "last_update", int(time.time())) + + +# ------------------------------------------------------------ +# Chapter counters +# ------------------------------------------------------------ +def set_chapter_total(book_id: str, total: int): + key = f"book:{book_id}:state" + r.hset(key, "chapters_total", total) + set_last_update(book_id) + + +def inc_chapter_done(book_id: str): + key = f"book:{book_id}:state" + r.hincrby(key, "chapters_done", 1) + set_last_update(book_id) + + +# ------------------------------------------------------------ +# Audio counters +# ------------------------------------------------------------ +def set_audio_total(book_id: str, total: int): + key = f"book:{book_id}:state" + r.hset(key, "audio_total", total) + set_last_update(book_id) + + +def inc_audio_done(book_id: str): + key = f"book:{book_id}:state" + r.hincrby(key, "audio_done", 1) + set_last_update(book_id) + + +# ------------------------------------------------------------ +# Skip reasons +# ------------------------------------------------------------ +def save_skip_reason(book_id: str, chapter: int, reason: str): + """ + Store explicit skip reason for transparency in UI. + """ + r.hset(f"book:{book_id}:skip_reasons", chapter, reason) + set_last_update(book_id) + + +# ------------------------------------------------------------ +# Full state readout +# ------------------------------------------------------------ +def get_state(book_id: str): + """ + Read global Book State Model + legacy progress, merged but not mixed. + """ + key = f"book:{book_id}:state" + state = r.hgetall(key) or {} + + # Numeric conversions + numeric_fields = ["chapters_total", "chapters_done", "audio_total", "audio_done"] + for field in numeric_fields: + if field in state: + try: + state[field] = int(state[field]) + except ValueError: + pass + + # Skip reasons + state["skip_reasons"] = r.hgetall(f"book:{book_id}:skip_reasons") or {} + + # Attach legacy progress separately + state["legacy_progress"] = get_progress(book_id) + + return state