# ============================================================ # File: scraper/progress.py # Purpose: Track chapter counters for WebGUI progress + # Book State Model (Redis-backed). # ============================================================ import os import time import redis REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0") r = redis.Redis.from_url(REDIS_URL, decode_responses=True) # ============================================================ # LEGACY PROGRESS FUNCTIONS (ONAANGEROERD BEHOUDEN) # ============================================================ # ------------------------------------------------------------ # SET TOTAL # ------------------------------------------------------------ def set_total(book_id: str, total: int): r.set(f"progress:{book_id}:total", total) # ------------------------------------------------------------ # COUNTERS legacy # ------------------------------------------------------------ def inc_completed(book_id: str): r.incr(f"progress:{book_id}:completed") def inc_skipped(book_id: str): r.incr(f"progress:{book_id}:skipped") def inc_failed(book_id: str): r.incr(f"progress:{book_id}:failed") # ------------------------------------------------------------ # FAILED CHAPTER LIST # ------------------------------------------------------------ def add_failed_chapter(book_id: str, chapter: int, reason: str): entry = f"Chapter {chapter}: {reason}" r.rpush(f"progress:{book_id}:failed_list", entry) def get_failed_list(book_id: str): return r.lrange(f"progress:{book_id}:failed_list", 0, -1) # ------------------------------------------------------------ # READ STRUCT FOR UI (legacy view) # ------------------------------------------------------------ def get_progress(book_id: str): total = int(r.get(f"progress:{book_id}:total") or 0) completed = int(r.get(f"progress:{book_id}:completed") or 0) skipped = int(r.get(f"progress:{book_id}:skipped") or 0) failed = int(r.get(f"progress:{book_id}:failed") or 0) abort = r.exists(f"abort:{book_id}") == 1 failed_list = get_failed_list(book_id) return { "book_id": book_id, "total": total, "completed": completed, "skipped": skipped, "failed": failed, "failed_list": failed_list, "abort": abort, } # ============================================================ # BOOK STATE MODEL (NIEUWE FUNCTIES — GEEN BREAKING CHANGES) # ============================================================ # ------------------------------------------------------------ # Initialize book state at start of scrape # ------------------------------------------------------------ def init_book_state( book_id: str, title: str = "", url: str = "", chapters_total: int = 0 ): key = f"book:{book_id}:state" now = int(time.time()) r.hset( key, mapping={ "book_id": book_id, "title": title or "", "url": url or "", "status": "scraping", "chapters_total": chapters_total, "chapters_done": 0, "chapters_download_skipped": 0, "audio_total": 0, "audio_done": 0, "last_update": now, }, ) # Track in library list r.sadd("books", book_id) # ------------------------------------------------------------ # Status + timestamps # ------------------------------------------------------------ def set_status(book_id: str, status: str): key = f"book:{book_id}:state" r.hset(key, "status", status) r.hset(key, "last_update", int(time.time())) def set_last_update(book_id: str): r.hset(f"book:{book_id}:state", "last_update", int(time.time())) # ------------------------------------------------------------ # Chapter counters new model # ------------------------------------------------------------ def set_chapter_total(book_id: str, total: int): key = f"book:{book_id}:state" r.hset(key, "chapters_total", total) set_last_update(book_id) def inc_chapter_download_skipped(book_id: str): key = f"book:{book_id}:state" r.hincrby(key, "chapters_download_skipped", 1) set_last_update(book_id) def inc_chapter_done(book_id: str): key = f"book:{book_id}:state" r.hincrby(key, "chapters_download_done", 1) set_last_update(book_id) # ------------------------------------------------------------ # Audio counters # ------------------------------------------------------------ def set_audio_total(book_id: str, total: int): key = f"book:{book_id}:state" r.hset(key, "audio_total", total) set_last_update(book_id) def inc_audio_done(book_id: str): key = f"book:{book_id}:state" r.hincrby(key, "audio_done", 1) set_last_update(book_id) def inc_audio_skipped(book_id: str): key = f"book:{book_id}:state" r.hincrby(key, "audio_skipped", 1) set_last_update(book_id) # ------------------------------------------------------------ # Skip reasons # ------------------------------------------------------------ def save_skip_reason(book_id: str, chapter: int, reason: str): """ Store explicit skip reason for transparency in UI. """ r.hset(f"book:{book_id}:skip_reasons", chapter, reason) set_last_update(book_id) # ------------------------------------------------------------ # Full state readout # ------------------------------------------------------------ def get_state(book_id: str): """ Read global Book State Model + legacy progress, merged but not mixed. """ key = f"book:{book_id}:state" state = r.hgetall(key) or {} # Numeric conversions numeric_fields = [ "chapters_total", "chapters_download_done", "chapters_download_skipped", "audio_total", "audio_skipped", "audio_done", ] for field in numeric_fields: if field in state: try: state[field] = int(state[field]) except ValueError: pass # Skip reasons state["skip_reasons"] = r.hgetall(f"book:{book_id}:skip_reasons") or {} # Attach legacy progress separately state["legacy_progress"] = get_progress(book_id) return state