You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/db/repository.py

391 lines
12 KiB

# ============================================================
# File: db/repository.py
# Purpose:
# Unified façade for BookScraper database state.
#
# Responsibilities:
# - Route metadata → SQLite
# - Route counters → Redis (live) + SQLite (snapshot)
# - Provide a clean API for tasks and Flask UI
# ============================================================
# ============================================================
# File: db/repository.py (UPDATED for book_idx-only architecture)
# ============================================================
from scraper.logger_decorators import logcall
from logbus.publisher import log
import redis
import os
import time
# ============================================================
# SQL low-level engines (snapshot storage)
# ============================================================
from db.state_sql import (
sql_fetch_book,
sql_fetch_all_books,
sql_set_status,
sql_set_chapters_total,
sql_register_book,
sql_update_book,
sql_inc_downloaded,
sql_inc_parsed,
sql_inc_audio_done,
sql_inc_audio_skipped,
)
# ============================================================
# REDIS low-level engines (live counters)
# ============================================================
from db.state_redis import (
redis_set_status,
redis_set_chapters_total,
redis_inc_download_done,
redis_inc_download_skipped,
redis_inc_parsed_done,
redis_inc_audio_done,
redis_inc_audio_skipped,
)
# ============================================================
# Redis setup for legacy progress paths
# ============================================================
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
_r = redis.Redis.from_url(REDIS_URL, decode_responses=True)
# ============================================================
# INTERNAL — LEGACY PROGRESS HELPERS (kept for UI)
# Keys remain: progress:{book_idx}:*
# ============================================================
def _legacy_set_total(book_idx, total):
_r.set(f"progress:{book_idx}:total", total)
def _legacy_inc_completed(book_idx):
_r.incr(f"progress:{book_idx}:completed")
def _legacy_inc_skipped(book_idx):
_r.incr(f"progress:{book_idx}:skipped")
def _legacy_inc_failed(book_idx):
_r.incr(f"progress:{book_idx}:failed")
def _legacy_add_failed_chapter(book_idx, chapter, reason):
entry = f"Chapter {chapter}: {reason}"
_r.rpush(f"progress:{book_idx}:failed_list", entry)
def _legacy_get_failed_list(book_idx):
return _r.lrange(f"progress:{book_idx}:failed_list", 0, -1)
def _legacy_get_progress(book_idx):
total = int(_r.get(f"progress:{book_idx}:total") or 0)
completed = int(_r.get(f"progress:{book_idx}:completed") or 0)
skipped = int(_r.get(f"progress:{book_idx}:skipped") or 0)
failed = int(_r.get(f"progress:{book_idx}:failed") or 0)
abort = _r.exists(f"abort:{book_idx}") == 1
failed_list = _legacy_get_failed_list(book_idx)
return {
"book_idx": book_idx,
"total": total,
"completed": completed,
"skipped": skipped,
"failed": failed,
"failed_list": failed_list,
"abort": abort,
}
# ============================================================
# PUBLIC — PROGRESS API
# ============================================================
@logcall
def get_progress(book_idx):
return _legacy_get_progress(book_idx)
@logcall
def add_failed_chapter(book_idx, chapter, reason):
_legacy_add_failed_chapter(book_idx, chapter, reason)
@logcall
def get_failed_list(book_idx):
return _legacy_get_failed_list(book_idx)
# ============================================================
# FETCH OPERATIONS (SQLite snapshot)
# ============================================================
@logcall
def fetch_book(book_idx):
return sql_fetch_book(book_idx)
@logcall
def fetch_all_books():
return sql_fetch_all_books()
# ============================================================
# INIT-FLOW (SQLite metadata only)
# ============================================================
@logcall
def register_book(
book_idx,
title,
author=None,
description=None,
cover_url=None,
cover_path=None,
book_url=None,
):
fields = {
"book_idx": book_idx,
"title": title,
"author": author,
"description": description,
"cover_url": cover_url,
"cover_path": cover_path,
"book_url": book_url,
"chapters_total": 0,
"status": "registered",
}
log(f"[DB] Registering new book_idx={book_idx} title='{title}'")
sql_register_book(book_idx, fields)
# ============================================================
# SCRAPE-FLOW UPDATE
# ============================================================
@logcall
def update_book_after_full_scrape(
book_idx,
title=None,
author=None,
description=None,
cover_url=None,
chapters_total=None,
):
fields = {}
if title is not None:
fields["title"] = title
if author is not None:
fields["author"] = author
if description is not None:
fields["description"] = description
if cover_url is not None:
fields["cover_url"] = cover_url
if chapters_total is not None:
fields["chapters_total"] = chapters_total
fields["status"] = "active"
log(f"[DB] update metadata for book_idx={book_idx}")
sql_update_book(book_idx, fields)
# ============================================================
# ACTIVE BOOK LISTS
# ============================================================
@logcall
def get_registered_books():
all_books = sql_fetch_all_books()
HIDDEN_STATES = {"hidden"}
log(f"[DB] Fetched all books for registered filter, total={len(all_books)}")
return [b for b in all_books if b.get("status") not in HIDDEN_STATES]
@logcall
def get_active_books():
all_books = sql_fetch_all_books()
HIDDEN_STATES = {"hidden", "done"}
log(f"[DB] Fetched all books for active filter, total={len(all_books)}")
return [b for b in all_books if b.get("status") not in HIDDEN_STATES]
# ============================================================
# STATUS MANAGEMENT
# ============================================================
@logcall
def set_status(book_idx, status):
log(f"[DB] Setting status for {book_idx} to '{status}'")
redis_set_status(book_idx, status)
sql_set_status(book_idx, status)
# ============================================================
# CHAPTER TOTALS
# ============================================================
@logcall
def set_chapters_total(book_idx, total):
log(f"[DB] Setting chapter total for {book_idx} to {total}")
redis_set_chapters_total(book_idx, total)
sql_set_chapters_total(book_idx, total)
# _legacy_set_total(book_idx, total)
# ============================================================
# COUNTERS — DOWNLOAD
# ============================================================
@logcall
def inc_download_done(book_idx, amount=1):
log(f"[DB] Incrementing download done for {book_idx} by {amount}")
redis_inc_download_done(book_idx, amount)
# sql_inc_downloaded(book_idx, amount)
# _legacy_inc_completed(book_idx)
@logcall
def inc_download_skipped(book_idx, amount=1):
log(f"[DB] Incrementing download skipped for {book_idx} by {amount}")
redis_inc_download_skipped(book_idx, amount)
# _legacy_inc_skipped(book_idx)
# ============================================================
# COUNTERS — PARSE
# ============================================================
@logcall
def inc_parsed_done(book_idx, amount=1):
log(f"[DB] Incrementing parsed done for {book_idx} by {amount}")
redis_inc_parsed_done(book_idx, amount)
# sql_inc_parsed(book_idx, amount)
# ============================================================
# COUNTERS — AUDIO
# ============================================================
@logcall
def inc_audio_skipped(book_idx, amount=1):
log(f"[DB] Incrementing audio skipped for {book_idx} by {amount}")
# sql_inc_audio_skipped(book_idx, amount)
redis_inc_audio_skipped(book_idx, amount)
@logcall
def inc_audio_done(book_idx, amount=1):
log(f"[DB] Incrementing audio done for {book_idx} by {amount}")
redis_inc_audio_done(book_idx, amount)
# sql_inc_audio_done(book_idx, amount)
# ============================================================
# BACKWARDS COMPATIBILITY SHIMS
# These map the old API (book_id) to the new book_idx-only system
# ============================================================
@logcall
def inc_downloaded(book_idx, amount=1):
return inc_download_done(book_idx, amount)
@logcall
def inc_parsed(book_idx, amount=1):
return inc_parsed_done(book_idx, amount)
@logcall
def inc_audio_done_legacy(book_idx, amount=1):
return inc_audio_done(book_idx, amount)
# ============================================================
# READ — DERIVED BOOK STATE
# ============================================================
@logcall
def get_book_state(book_idx):
"""
Canonical merged read-model for a single book.
Gedrag:
- Leest SQL (snapshot)
- Leest Redis (live counters)
- Rekent naar merged
- GEEN writes
- GEEN side-effects
Merge-regels:
- merged = max(sql, redis)
- merged wordt gecapt op chapters_total
"""
# ----------------------------------------------------
# 1. Fetch bronnen
# ----------------------------------------------------
sqlite_row = sql_fetch_book(book_idx) or {}
key = f"book:{book_idx}:state"
redis_state = _r.hgetall(key) or {}
def _int(v):
try:
return int(v)
except Exception:
return 0
# ----------------------------------------------------
# 2. SQL snapshot
# ----------------------------------------------------
chapters_total = _int(sqlite_row.get("chapters_total"))
sql_downloaded = _int(sqlite_row.get("downloaded"))
sql_audio_done = _int(sqlite_row.get("audio_done"))
# ----------------------------------------------------
# 3. Redis live counters
# ----------------------------------------------------
redis_downloaded = _int(redis_state.get("chapters_download_done")) + _int(
redis_state.get("chapters_download_skipped")
)
redis_audio_done = _int(redis_state.get("audio_done"))
# ----------------------------------------------------
# 4. Merge (SQL vs Redis)
# ----------------------------------------------------
merged_downloaded = max(sql_downloaded, redis_downloaded)
merged_audio_done = max(sql_audio_done, redis_audio_done)
if chapters_total > 0:
merged_downloaded = min(merged_downloaded, chapters_total)
merged_audio_done = min(merged_audio_done, chapters_total)
# ----------------------------------------------------
# 5. Bouw merged state (read-only)
# ----------------------------------------------------
state = {}
# Basis = SQL
state.update(sqlite_row)
# Overschrijf alleen met merged conclusies
state["downloaded"] = merged_downloaded
state["audio_done"] = merged_audio_done
state["chapters_total"] = chapters_total
# ----------------------------------------------------
# 4b. Derive status (READ-ONLY)
# ----------------------------------------------------
derived_status = sqlite_row.get("status") or "unknown"
if chapters_total > 0:
if merged_downloaded < chapters_total:
derived_status = "downloading"
elif merged_downloaded == chapters_total and merged_audio_done < chapters_total:
derived_status = "audio"
elif merged_audio_done == chapters_total:
derived_status = "done"
state["status"] = derived_status
return state