# ============================================================ # File: db/repository.py # Purpose: # Unified façade for BookScraper database state. # # Responsibilities: # - Route metadata → SQLite # - Route counters → Redis (live) + SQLite (snapshot) # - Provide a clean API for tasks and Flask UI # ============================================================ # ============================================================ # UPDATED — canonical read model via get_book_state # ============================================================ from scraper.logger_decorators import logcall from logbus.publisher import log import redis import os # ============================================================ # SQL low-level engines (snapshot storage) # ============================================================ from db.state_sql import ( sql_fetch_book, sql_fetch_all_books, sql_set_status, sql_set_chapters_total, sql_register_book, sql_update_book, ) # ============================================================ # REDIS low-level engines (live counters) # ============================================================ from db.state_redis import ( redis_set_status, redis_set_chapters_total, redis_inc_download_done, redis_inc_download_skipped, redis_inc_parsed_done, redis_inc_audio_done, redis_inc_audio_skipped, ) # ============================================================ # Redis client (read-only for legacy + guards) # ============================================================ REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0") _r = redis.Redis.from_url(REDIS_URL, decode_responses=True) # ============================================================ # LEGACY PROGRESS (UI only, unchanged) # ============================================================ def _legacy_get_progress(book_idx): return { "book_idx": book_idx, "total": int(_r.get(f"progress:{book_idx}:total") or 0), "completed": int(_r.get(f"progress:{book_idx}:completed") or 0), "skipped": int(_r.get(f"progress:{book_idx}:skipped") or 0), "failed": int(_r.get(f"progress:{book_idx}:failed") or 0), "abort": _r.exists(f"abort:{book_idx}") == 1, "failed_list": _r.lrange(f"progress:{book_idx}:failed_list", 0, -1), } @logcall def get_progress(book_idx): return _legacy_get_progress(book_idx) # ============================================================ # FETCH (SQLite snapshot) # ============================================================ @logcall def fetch_book(book_idx): return sql_fetch_book(book_idx) @logcall def fetch_all_books(): return sql_fetch_all_books() # ============================================================ # INIT / UPDATE METADATA # ============================================================ @logcall def register_book( book_idx, title, author=None, description=None, cover_url=None, cover_path=None, book_url=None, ): sql_register_book( book_idx, { "book_idx": book_idx, "title": title, "author": author, "description": description, "cover_url": cover_url, "cover_path": cover_path, "book_url": book_url, "chapters_total": 0, "status": "registered", }, ) @logcall def update_book_after_full_scrape( book_idx, title=None, author=None, description=None, cover_url=None, chapters_total=None, ): fields = {} if title is not None: fields["title"] = title if author is not None: fields["author"] = author if description is not None: fields["description"] = description if cover_url is not None: fields["cover_url"] = cover_url if chapters_total is not None: fields["chapters_total"] = chapters_total fields["status"] = "active" sql_update_book(book_idx, fields) # ============================================================ # STATUS # ============================================================ @logcall def set_status(book_idx, status): redis_set_status(book_idx, status) sql_set_status(book_idx, status) # ============================================================ # TOTALS # ============================================================ @logcall def set_chapters_total(book_idx, total): redis_set_chapters_total(book_idx, total) sql_set_chapters_total(book_idx, total) # ============================================================ # COUNTERS — WRITE ONLY # ============================================================ @logcall def inc_download_done(book_idx, amount=1): redis_inc_download_done(book_idx, amount) @logcall def inc_download_skipped(book_idx, amount=1): redis_inc_download_skipped(book_idx, amount) @logcall def inc_parsed_done(book_idx, amount=1): redis_inc_parsed_done(book_idx, amount) @logcall def inc_audio_done(book_idx, amount=1): redis_inc_audio_done(book_idx, amount) @logcall def inc_audio_skipped(book_idx, amount=1): redis_inc_audio_skipped(book_idx, amount) # ============================================================ # CANONICAL READ MODEL # ============================================================ @logcall def get_book_state(book_idx): """ Canonical merged read model. Rules: - SQL = snapshot baseline - Redis = live counters - merged = max(sql, redis) - capped at chapters_total """ sqlite_row = sql_fetch_book(book_idx) or {} redis_state = _r.hgetall(f"book:{book_idx}:state") or {} def _int(v): try: return int(v) except Exception: return 0 chapters_total = _int(sqlite_row.get("chapters_total")) # SQL snapshot sql_downloaded = _int(sqlite_row.get("downloaded")) sql_audio_done = _int(sqlite_row.get("audio_done")) sql_audio_skipped = _int(sqlite_row.get("audio_skipped")) # Redis live redis_downloaded = _int(redis_state.get("chapters_download_done")) + _int( redis_state.get("chapters_download_skipped") ) redis_audio_done = _int(redis_state.get("audio_done")) redis_audio_skipped = _int(redis_state.get("audio_skipped")) # Merge merged_downloaded = max(sql_downloaded, redis_downloaded) merged_audio_done = max(sql_audio_done, redis_audio_done) merged_audio_skipped = max(sql_audio_skipped, redis_audio_skipped) if chapters_total > 0: merged_downloaded = min(merged_downloaded, chapters_total) merged_audio_done = min(merged_audio_done, chapters_total) merged_audio_skipped = min(merged_audio_skipped, chapters_total) audio_completed = merged_audio_done + merged_audio_skipped # Build state state = dict(sqlite_row) state.update( { "downloaded": merged_downloaded, "audio_done": merged_audio_done, "audio_skipped": merged_audio_skipped, "chapters_total": chapters_total, } ) # Derived status status = sqlite_row.get("status") or "unknown" if chapters_total > 0: if merged_downloaded < chapters_total: status = "downloading" elif merged_downloaded == chapters_total and audio_completed < chapters_total: status = "audio" elif audio_completed >= chapters_total: status = "done" state["status"] = status return state # ============================================================ # READ HELPERS (VIA get_book_state ONLY) # ============================================================ @logcall def get_chapters_total(book_idx): return int(get_book_state(book_idx).get("chapters_total", 0)) @logcall def get_audio_done(book_idx): return int(get_book_state(book_idx).get("audio_done", 0)) @logcall def get_audio_completed_total(book_idx): state = get_book_state(book_idx) return int(state.get("audio_done", 0)) + int(state.get("audio_skipped", 0)) # ============================================================ # STATUSCHECK GUARD (INTENTIONAL DIRECT REDIS) # ============================================================ @logcall def try_trigger_statuscheck(book_idx): return bool(_r.set(f"book:{book_idx}:statuscheck:triggered", "1", nx=True)) # ============================================================ # ACTIVE / REGISTERED BOOK LISTS (UI API) # ============================================================ @logcall def get_registered_books(): """ Books visible in the 'registered' list in the UI. """ all_books = sql_fetch_all_books() HIDDEN_STATES = {"hidden"} return [b for b in all_books if b.get("status") not in HIDDEN_STATES] @logcall def get_active_books(): """ Books currently active in the dashboard. """ all_books = sql_fetch_all_books() HIDDEN_STATES = {"hidden", "done"} return [b for b in all_books if b.get("status") not in HIDDEN_STATES] @logcall def store_m4b_error(book_idx: str, volume: str, error_text: str): """ Passive storage of m4b errors. No logic, no retries, no state transitions. """ key = f"book:{book_idx}:m4b:errors" entry = f"{volume}: {error_text}" _r.rpush(key, entry)