# ============================================================ # File: db/repository.py # Purpose: # Unified façade for BookScraper database state. # # Responsibilities: # - Route metadata → SQLite # - Route counters → Redis (live) + SQLite (snapshot) # - Provide a clean API for tasks and Flask UI # ============================================================ # ============================================================ # File: db/repository.py (UPDATED for book_idx-only architecture) # ============================================================ from scraper.logger_decorators import logcall from logbus.publisher import log import redis import os import time # ============================================================ # SQL low-level engines (snapshot storage) # ============================================================ from db.state_sql import ( sql_fetch_book, sql_fetch_all_books, sql_set_status, sql_set_chapters_total, sql_register_book, sql_update_book, sql_inc_downloaded, sql_inc_parsed, sql_inc_audio_done, sql_inc_audio_skipped, ) # ============================================================ # REDIS low-level engines (live counters) # ============================================================ from db.state_redis import ( redis_set_status, redis_set_chapters_total, redis_inc_download_done, redis_inc_download_skipped, redis_inc_parsed_done, redis_inc_audio_done, redis_inc_audio_skipped, ) # ============================================================ # Redis setup for legacy progress paths # ============================================================ REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0") _r = redis.Redis.from_url(REDIS_URL, decode_responses=True) # ============================================================ # INTERNAL — LEGACY PROGRESS HELPERS (kept for UI) # Keys remain: progress:{book_idx}:* # ============================================================ def _legacy_set_total(book_idx, total): _r.set(f"progress:{book_idx}:total", total) def _legacy_inc_completed(book_idx): _r.incr(f"progress:{book_idx}:completed") def _legacy_inc_skipped(book_idx): _r.incr(f"progress:{book_idx}:skipped") def _legacy_inc_failed(book_idx): _r.incr(f"progress:{book_idx}:failed") def _legacy_add_failed_chapter(book_idx, chapter, reason): entry = f"Chapter {chapter}: {reason}" _r.rpush(f"progress:{book_idx}:failed_list", entry) def _legacy_get_failed_list(book_idx): return _r.lrange(f"progress:{book_idx}:failed_list", 0, -1) def _legacy_get_progress(book_idx): total = int(_r.get(f"progress:{book_idx}:total") or 0) completed = int(_r.get(f"progress:{book_idx}:completed") or 0) skipped = int(_r.get(f"progress:{book_idx}:skipped") or 0) failed = int(_r.get(f"progress:{book_idx}:failed") or 0) abort = _r.exists(f"abort:{book_idx}") == 1 failed_list = _legacy_get_failed_list(book_idx) return { "book_idx": book_idx, "total": total, "completed": completed, "skipped": skipped, "failed": failed, "failed_list": failed_list, "abort": abort, } # ============================================================ # PUBLIC — PROGRESS API # ============================================================ @logcall def get_progress(book_idx): return _legacy_get_progress(book_idx) @logcall def add_failed_chapter(book_idx, chapter, reason): _legacy_add_failed_chapter(book_idx, chapter, reason) @logcall def get_failed_list(book_idx): return _legacy_get_failed_list(book_idx) # ============================================================ # FETCH OPERATIONS (SQLite snapshot) # ============================================================ @logcall def fetch_book(book_idx): return sql_fetch_book(book_idx) @logcall def fetch_all_books(): return sql_fetch_all_books() # ============================================================ # INIT-FLOW (SQLite metadata only) # ============================================================ @logcall def register_book( book_idx, title, author=None, description=None, cover_url=None, cover_path=None, book_url=None, ): fields = { "book_idx": book_idx, "title": title, "author": author, "description": description, "cover_url": cover_url, "cover_path": cover_path, "book_url": book_url, "chapters_total": 0, "status": "registered", } log(f"[DB] Registering new book_idx={book_idx} title='{title}'") sql_register_book(book_idx, fields) # ============================================================ # SCRAPE-FLOW UPDATE # ============================================================ @logcall def update_book_after_full_scrape( book_idx, title=None, author=None, description=None, cover_url=None, chapters_total=None, ): fields = {} if title is not None: fields["title"] = title if author is not None: fields["author"] = author if description is not None: fields["description"] = description if cover_url is not None: fields["cover_url"] = cover_url if chapters_total is not None: fields["chapters_total"] = chapters_total fields["status"] = "active" log(f"[DB] update metadata for book_idx={book_idx}") sql_update_book(book_idx, fields) # ============================================================ # ACTIVE BOOK LISTS # ============================================================ @logcall def get_registered_books(): all_books = sql_fetch_all_books() HIDDEN_STATES = {"hidden"} log(f"[DB] Fetched all books for registered filter, total={len(all_books)}") return [b for b in all_books if b.get("status") not in HIDDEN_STATES] @logcall def get_active_books(): all_books = sql_fetch_all_books() HIDDEN_STATES = {"hidden", "done"} log(f"[DB] Fetched all books for active filter, total={len(all_books)}") return [b for b in all_books if b.get("status") not in HIDDEN_STATES] # ============================================================ # STATUS MANAGEMENT # ============================================================ @logcall def set_status(book_idx, status): log(f"[DB] Setting status for {book_idx} to '{status}'") redis_set_status(book_idx, status) sql_set_status(book_idx, status) # ============================================================ # CHAPTER TOTALS # ============================================================ @logcall def set_chapters_total(book_idx, total): log(f"[DB] Setting chapter total for {book_idx} to {total}") redis_set_chapters_total(book_idx, total) sql_set_chapters_total(book_idx, total) # _legacy_set_total(book_idx, total) # ============================================================ # COUNTERS — DOWNLOAD # ============================================================ @logcall def inc_download_done(book_idx, amount=1): log(f"[DB] Incrementing download done for {book_idx} by {amount}") redis_inc_download_done(book_idx, amount) # sql_inc_downloaded(book_idx, amount) # _legacy_inc_completed(book_idx) @logcall def inc_download_skipped(book_idx, amount=1): log(f"[DB] Incrementing download skipped for {book_idx} by {amount}") redis_inc_download_skipped(book_idx, amount) # _legacy_inc_skipped(book_idx) # ============================================================ # COUNTERS — PARSE # ============================================================ @logcall def inc_parsed_done(book_idx, amount=1): log(f"[DB] Incrementing parsed done for {book_idx} by {amount}") redis_inc_parsed_done(book_idx, amount) # sql_inc_parsed(book_idx, amount) # ============================================================ # COUNTERS — AUDIO # ============================================================ @logcall def inc_audio_skipped(book_idx, amount=1): log(f"[DB] Incrementing audio skipped for {book_idx} by {amount}") # sql_inc_audio_skipped(book_idx, amount) redis_inc_audio_skipped(book_idx, amount) @logcall def inc_audio_done(book_idx, amount=1): log(f"[DB] Incrementing audio done for {book_idx} by {amount}") redis_inc_audio_done(book_idx, amount) # sql_inc_audio_done(book_idx, amount) # ============================================================ # BACKWARDS COMPATIBILITY SHIMS # These map the old API (book_id) to the new book_idx-only system # ============================================================ @logcall def inc_downloaded(book_idx, amount=1): return inc_download_done(book_idx, amount) @logcall def inc_parsed(book_idx, amount=1): return inc_parsed_done(book_idx, amount) @logcall def inc_audio_done_legacy(book_idx, amount=1): return inc_audio_done(book_idx, amount) # ============================================================ # READ — DERIVED BOOK STATE # ============================================================ @logcall def get_book_state(book_idx): """ Canonical read-model for a single book. Responsibilities: - Read SQLite snapshot (static metadata) - Read Redis live state (counters / status) - Compute derived fields (NO UI logic) Invariants: - downloaded = chapters_download_done + chapters_download_skipped """ # --- SQLite snapshot --- sqlite_row = sql_fetch_book(book_idx) or {} # --- Redis live state --- key = f"book:{book_idx}:state" redis_state = _r.hgetall(key) or {} # Normalize numeric redis values def _int(v): try: return int(v) except Exception: return 0 # --- primary counters --- chapters_done = _int(redis_state.get("chapters_download_done")) chapters_skipped = _int(redis_state.get("chapters_download_skipped")) # --- derived counters --- downloaded = chapters_done + chapters_skipped # --- build canonical state --- state = {} # 1) start with SQLite snapshot state.update(sqlite_row) # 2) overlay Redis live fields state.update(redis_state) # 3) enforce derived invariants state["downloaded"] = downloaded return state