You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/db/repository.py

304 lines
8.5 KiB

# ============================================================
# File: db/repository.py
# Purpose:
# Unified façade for BookScraper database state.
#
# Responsibilities:
# - Route metadata → SQLite
# - Route counters → Redis (live) + SQLite (snapshot)
# - Provide a clean API for tasks and Flask UI
# ============================================================
from scraper.logger_decorators import logcall
from logbus.publisher import log
import redis
import os
import time
# ============================================================
# SQL low-level engines (snapshot storage)
# ============================================================
from db.state_sql import (
sql_fetch_book,
sql_fetch_all_books,
sql_set_status,
sql_set_chapters_total,
sql_register_book,
sql_update_book,
sql_inc_downloaded,
sql_inc_parsed,
sql_inc_audio_done,
sql_inc_audio_skipped,
)
# ============================================================
# REDIS low-level engines (live counters)
# ============================================================
from db.state_redis import (
redis_set_status,
redis_set_chapters_total,
redis_inc_download_done,
redis_inc_download_skipped,
redis_inc_parsed_done,
redis_inc_audio_done,
redis_inc_audio_skipped,
)
# ============================================================
# Redis setup for legacy progress paths
# ============================================================
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
_r = redis.Redis.from_url(REDIS_URL, decode_responses=True)
# ============================================================
# INTERNAL — legacy progress helpers
# ============================================================
def _legacy_set_total(book_id, total):
_r.set(f"progress:{book_id}:total", total)
def _legacy_inc_completed(book_id):
_r.incr(f"progress:{book_id}:completed")
def _legacy_inc_skipped(book_id):
_r.incr(f"progress:{book_id}:skipped")
def _legacy_inc_failed(book_id):
_r.incr(f"progress:{book_id}:failed")
def _legacy_add_failed_chapter(book_id, chapter, reason):
entry = f"Chapter {chapter}: {reason}"
_r.rpush(f"progress:{book_id}:failed_list", entry)
def _legacy_get_failed_list(book_id):
return _r.lrange(f"progress:{book_id}:failed_list", 0, -1)
def _legacy_get_progress(book_id):
total = int(_r.get(f"progress:{book_id}:total") or 0)
completed = int(_r.get(f"progress:{book_id}:completed") or 0)
skipped = int(_r.get(f"progress:{book_id}:skipped") or 0)
failed = int(_r.get(f"progress:{book_id}:failed") or 0)
abort = _r.exists(f"abort:{book_id}") == 1
failed_list = _legacy_get_failed_list(book_id)
return {
"book_id": book_id,
"total": total,
"completed": completed,
"skipped": skipped,
"failed": failed,
"failed_list": failed_list,
"abort": abort,
}
# ============================================================
# PUBLIC — UI-ready legacy progress access
# ============================================================
@logcall
def get_progress(book_id):
return _legacy_get_progress(book_id)
@logcall
def add_failed_chapter(book_id, chapter, reason):
_legacy_add_failed_chapter(book_id, chapter, reason)
@logcall
def get_failed_list(book_id):
return _legacy_get_failed_list(book_id)
# ============================================================
# FETCH OPERATIONS (SQLite snapshot)
# ============================================================
@logcall
def fetch_book(book_id):
return sql_fetch_book(book_id)
@logcall
def fetch_all_books():
return sql_fetch_all_books()
# ============================================================
# INIT-FLOW (SQLite metadata only)
# ============================================================
@logcall
def register_book(
book_id,
title,
author=None,
description=None,
cover_url=None,
cover_path=None,
book_url=None,
):
fields = {
"title": title,
"author": author,
"description": description,
"cover_url": cover_url,
"cover_path": cover_path,
"book_url": book_url,
"chapters_total": 0,
"status": "registered",
}
log(f"[DB] Registering new book={book_id} title='{title}'")
sql_register_book(book_id, fields)
@logcall
def update_book_after_full_scrape(
book_id,
title=None,
author=None,
description=None,
cover_url=None,
chapters_total=None,
):
fields = {}
if title is not None:
fields["title"] = title
if author is not None:
fields["author"] = author
if description is not None:
fields["description"] = description
if cover_url is not None:
fields["cover_url"] = cover_url
if chapters_total is not None:
fields["chapters_total"] = chapters_total
fields["status"] = "active"
log(f"[DB] update full scrape metadata book={book_id}")
sql_update_book(book_id, fields)
# ============================================================
# ACTIVE BOOK LISTS
# ============================================================
@logcall
def get_registered_books():
all_books = sql_fetch_all_books()
return [b for b in all_books if b.get("status") == "registered"]
@logcall
def get_active_books():
all_books = sql_fetch_all_books()
log(f"[DB] Fetched all books for active filter, total={len(all_books)}")
return [b for b in all_books if b.get("status") in ("active", "downloading")]
# ============================================================
# STATUS MANAGEMENT
# ============================================================
@logcall
def set_status(book_id, status):
log(f"[DB] Setting status for {book_id} to '{status}'")
redis_set_status(book_id, status)
sql_set_status(book_id, status)
# ============================================================
# CHAPTER TOTALS
# ============================================================
@logcall
def set_chapters_total(book_id, total):
log(f"[DB] Setting chapter total for {book_id} to {total}")
redis_set_chapters_total(book_id, total)
sql_set_chapters_total(book_id, total)
_legacy_set_total(book_id, total) # integrate legacy progress
# ============================================================
# COUNTERS — DOWNLOAD
# ============================================================
@logcall
def inc_download_done(book_id, amount=1):
log(f"[DB] Incrementing download done for {book_id} by {amount}")
redis_inc_download_done(book_id, amount)
sql_inc_downloaded(book_id, amount)
_legacy_inc_completed(book_id)
@logcall
def inc_download_skipped(book_id, amount=1):
log(f"[DB] Incrementing download skipped for {book_id} by {amount}")
redis_inc_download_skipped(book_id, amount)
_legacy_inc_skipped(book_id)
# ============================================================
# COUNTERS — PARSE
# ============================================================
@logcall
def inc_parsed_done(book_id, amount=1):
log(f"[DB] Incrementing parsed done for {book_id} by {amount}")
redis_inc_parsed_done(book_id, amount)
sql_inc_parsed(book_id, amount)
# ============================================================
# COUNTERS — AUDIO
# ============================================================
# ============================================================
# COUNTERS — AUDIO SKIPPED
# ============================================================
@logcall
def inc_audio_skipped(book_id, amount=1):
log(f"[DB] Incrementing audio skipped for {book_id} by {amount}")
# Redis live counter (maak deze functie in state_redis wanneer nodig)
sql_inc_audio_skipped(book_id, amount)
redis_inc_audio_skipped(book_id, amount)
# Geen SQLite kolom? Dan overslaan.
@logcall
def inc_audio_done(book_id, amount=1):
log(f"[DB] Incrementing audio done for {book_id} by {amount}")
redis_inc_audio_done(book_id, amount)
sql_inc_audio_done(book_id, amount)
# ============================================================
# BACKWARDS COMPATIBILITY SHIMS (old task API)
# ============================================================
@logcall
def inc_downloaded(book_id, amount=1):
"""
Old name used by older tasks.
Redirects to new unified counter.
"""
return inc_download_done(book_id, amount)
@logcall
def inc_parsed(book_id, amount=1):
"""
Old name used by older tasks.
"""
return inc_parsed_done(book_id, amount)
@logcall
def inc_audio_done_legacy(book_id, amount=1):
"""
Old audio name used by older tasks.
"""
return inc_audio_done(book_id, amount)