|
|
|
|
@ -1,15 +1,22 @@
|
|
|
|
|
# ============================================================
|
|
|
|
|
# File: scraper/progress.py
|
|
|
|
|
# Purpose: Track chapter counters for WebGUI progress.
|
|
|
|
|
# Purpose: Track chapter counters for WebGUI progress +
|
|
|
|
|
# Book State Model (Redis-backed).
|
|
|
|
|
# ============================================================
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
import time
|
|
|
|
|
import redis
|
|
|
|
|
|
|
|
|
|
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
|
|
|
|
|
r = redis.Redis.from_url(REDIS_URL, decode_responses=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ============================================================
|
|
|
|
|
# LEGACY PROGRESS FUNCTIONS (ONAANGEROERD BEHOUDEN)
|
|
|
|
|
# ============================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
# SET TOTAL
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
@ -45,7 +52,7 @@ def get_failed_list(book_id: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
# READ STRUCT FOR UI
|
|
|
|
|
# READ STRUCT FOR UI (legacy view)
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
def get_progress(book_id: str):
|
|
|
|
|
total = int(r.get(f"progress:{book_id}:total") or 0)
|
|
|
|
|
@ -64,3 +71,118 @@ def get_progress(book_id: str):
|
|
|
|
|
"failed_list": failed_list,
|
|
|
|
|
"abort": abort,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ============================================================
|
|
|
|
|
# BOOK STATE MODEL (NIEUWE FUNCTIES — GEEN BREAKING CHANGES)
|
|
|
|
|
# ============================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
# Initialize book state at start of scrape
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
def init_book_state(
|
|
|
|
|
book_id: str, title: str = "", url: str = "", chapters_total: int = 0
|
|
|
|
|
):
|
|
|
|
|
key = f"book:{book_id}:state"
|
|
|
|
|
now = int(time.time())
|
|
|
|
|
|
|
|
|
|
r.hset(
|
|
|
|
|
key,
|
|
|
|
|
mapping={
|
|
|
|
|
"book_id": book_id,
|
|
|
|
|
"title": title or "",
|
|
|
|
|
"url": url or "",
|
|
|
|
|
"status": "scraping",
|
|
|
|
|
"chapters_total": chapters_total,
|
|
|
|
|
"chapters_done": 0,
|
|
|
|
|
"audio_total": 0,
|
|
|
|
|
"audio_done": 0,
|
|
|
|
|
"last_update": now,
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Track in library list
|
|
|
|
|
r.sadd("books", book_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
# Status + timestamps
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
def set_status(book_id: str, status: str):
|
|
|
|
|
key = f"book:{book_id}:state"
|
|
|
|
|
r.hset(key, "status", status)
|
|
|
|
|
r.hset(key, "last_update", int(time.time()))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_last_update(book_id: str):
|
|
|
|
|
r.hset(f"book:{book_id}:state", "last_update", int(time.time()))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
# Chapter counters
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
def set_chapter_total(book_id: str, total: int):
|
|
|
|
|
key = f"book:{book_id}:state"
|
|
|
|
|
r.hset(key, "chapters_total", total)
|
|
|
|
|
set_last_update(book_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def inc_chapter_done(book_id: str):
|
|
|
|
|
key = f"book:{book_id}:state"
|
|
|
|
|
r.hincrby(key, "chapters_done", 1)
|
|
|
|
|
set_last_update(book_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
# Audio counters
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
def set_audio_total(book_id: str, total: int):
|
|
|
|
|
key = f"book:{book_id}:state"
|
|
|
|
|
r.hset(key, "audio_total", total)
|
|
|
|
|
set_last_update(book_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def inc_audio_done(book_id: str):
|
|
|
|
|
key = f"book:{book_id}:state"
|
|
|
|
|
r.hincrby(key, "audio_done", 1)
|
|
|
|
|
set_last_update(book_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
# Skip reasons
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
def save_skip_reason(book_id: str, chapter: int, reason: str):
|
|
|
|
|
"""
|
|
|
|
|
Store explicit skip reason for transparency in UI.
|
|
|
|
|
"""
|
|
|
|
|
r.hset(f"book:{book_id}:skip_reasons", chapter, reason)
|
|
|
|
|
set_last_update(book_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
# Full state readout
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
def get_state(book_id: str):
|
|
|
|
|
"""
|
|
|
|
|
Read global Book State Model + legacy progress, merged but not mixed.
|
|
|
|
|
"""
|
|
|
|
|
key = f"book:{book_id}:state"
|
|
|
|
|
state = r.hgetall(key) or {}
|
|
|
|
|
|
|
|
|
|
# Numeric conversions
|
|
|
|
|
numeric_fields = ["chapters_total", "chapters_done", "audio_total", "audio_done"]
|
|
|
|
|
for field in numeric_fields:
|
|
|
|
|
if field in state:
|
|
|
|
|
try:
|
|
|
|
|
state[field] = int(state[field])
|
|
|
|
|
except ValueError:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# Skip reasons
|
|
|
|
|
state["skip_reasons"] = r.hgetall(f"book:{book_id}:skip_reasons") or {}
|
|
|
|
|
|
|
|
|
|
# Attach legacy progress separately
|
|
|
|
|
state["legacy_progress"] = get_progress(book_id)
|
|
|
|
|
|
|
|
|
|
return state
|
|
|
|
|
|