You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/progress.py

209 lines
6.1 KiB

# ============================================================
# File: scraper/progress.py
# Purpose: Track chapter counters for WebGUI progress +
# Book State Model (Redis-backed).
# ============================================================
import os
import time
import redis
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
r = redis.Redis.from_url(REDIS_URL, decode_responses=True)
# ============================================================
# LEGACY PROGRESS FUNCTIONS (ONAANGEROERD BEHOUDEN)
# ============================================================
# ------------------------------------------------------------
# SET TOTAL
# ------------------------------------------------------------
def set_total(book_id: str, total: int):
r.set(f"progress:{book_id}:total", total)
# ------------------------------------------------------------
# COUNTERS legacy
# ------------------------------------------------------------
def inc_completed(book_id: str):
r.incr(f"progress:{book_id}:completed")
def inc_skipped(book_id: str):
r.incr(f"progress:{book_id}:skipped")
def inc_failed(book_id: str):
r.incr(f"progress:{book_id}:failed")
# ------------------------------------------------------------
# FAILED CHAPTER LIST
# ------------------------------------------------------------
def add_failed_chapter(book_id: str, chapter: int, reason: str):
entry = f"Chapter {chapter}: {reason}"
r.rpush(f"progress:{book_id}:failed_list", entry)
def get_failed_list(book_id: str):
return r.lrange(f"progress:{book_id}:failed_list", 0, -1)
# ------------------------------------------------------------
# READ STRUCT FOR UI (legacy view)
# ------------------------------------------------------------
def get_progress(book_id: str):
total = int(r.get(f"progress:{book_id}:total") or 0)
completed = int(r.get(f"progress:{book_id}:completed") or 0)
skipped = int(r.get(f"progress:{book_id}:skipped") or 0)
failed = int(r.get(f"progress:{book_id}:failed") or 0)
abort = r.exists(f"abort:{book_id}") == 1
failed_list = get_failed_list(book_id)
return {
"book_id": book_id,
"total": total,
"completed": completed,
"skipped": skipped,
"failed": failed,
"failed_list": failed_list,
"abort": abort,
}
# ============================================================
# BOOK STATE MODEL (NIEUWE FUNCTIES — GEEN BREAKING CHANGES)
# ============================================================
# ------------------------------------------------------------
# Initialize book state at start of scrape
# ------------------------------------------------------------
def init_book_state(
book_id: str, title: str = "", url: str = "", chapters_total: int = 0
):
key = f"book:{book_id}:state"
now = int(time.time())
r.hset(
key,
mapping={
"book_id": book_id,
"title": title or "",
"url": url or "",
"status": "scraping",
"chapters_total": chapters_total,
"chapters_done": 0,
"chapters_download_skipped": 0,
"audio_total": 0,
"audio_done": 0,
"last_update": now,
},
)
# Track in library list
r.sadd("books", book_id)
# ------------------------------------------------------------
# Status + timestamps
# ------------------------------------------------------------
def set_status(book_id: str, status: str):
key = f"book:{book_id}:state"
r.hset(key, "status", status)
r.hset(key, "last_update", int(time.time()))
def set_last_update(book_id: str):
r.hset(f"book:{book_id}:state", "last_update", int(time.time()))
# ------------------------------------------------------------
# Chapter counters new model
# ------------------------------------------------------------
def set_chapter_total(book_id: str, total: int):
key = f"book:{book_id}:state"
r.hset(key, "chapters_total", total)
set_last_update(book_id)
def inc_chapter_download_skipped(book_id: str):
key = f"book:{book_id}:state"
r.hincrby(key, "chapters_download_skipped", 1)
set_last_update(book_id)
def inc_chapter_done(book_id: str):
key = f"book:{book_id}:state"
r.hincrby(key, "chapters_download_done", 1)
set_last_update(book_id)
# ------------------------------------------------------------
# Audio counters
# ------------------------------------------------------------
def set_audio_total(book_id: str, total: int):
key = f"book:{book_id}:state"
r.hset(key, "audio_total", total)
set_last_update(book_id)
def inc_audio_done(book_id: str):
key = f"book:{book_id}:state"
r.hincrby(key, "audio_done", 1)
set_last_update(book_id)
def inc_audio_skipped(book_id: str):
key = f"book:{book_id}:state"
r.hincrby(key, "audio_skipped", 1)
set_last_update(book_id)
# ------------------------------------------------------------
# Skip reasons
# ------------------------------------------------------------
def save_skip_reason(book_id: str, chapter: int, reason: str):
"""
Store explicit skip reason for transparency in UI.
"""
r.hset(f"book:{book_id}:skip_reasons", chapter, reason)
set_last_update(book_id)
# ------------------------------------------------------------
# Full state readout
# ------------------------------------------------------------
def get_state(book_id: str):
"""
Read global Book State Model + legacy progress, merged but not mixed.
"""
key = f"book:{book_id}:state"
state = r.hgetall(key) or {}
# Numeric conversions
numeric_fields = [
"chapters_total",
"chapters_download_done",
"chapters_download_skipped",
"audio_total",
"audio_skipped",
"audio_done",
]
for field in numeric_fields:
if field in state:
try:
state[field] = int(state[field])
except ValueError:
pass
# Skip reasons
state["skip_reasons"] = r.hgetall(f"book:{book_id}:skip_reasons") or {}
# Attach legacy progress separately
state["legacy_progress"] = get_progress(book_id)
return state