You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
209 lines
6.1 KiB
209 lines
6.1 KiB
# ============================================================
|
|
# File: scraper/progress.py
|
|
# Purpose: Track chapter counters for WebGUI progress +
|
|
# Book State Model (Redis-backed).
|
|
# ============================================================
|
|
|
|
import os
|
|
import time
|
|
import redis
|
|
|
|
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
|
|
r = redis.Redis.from_url(REDIS_URL, decode_responses=True)
|
|
|
|
|
|
# ============================================================
|
|
# LEGACY PROGRESS FUNCTIONS (ONAANGEROERD BEHOUDEN)
|
|
# ============================================================
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# SET TOTAL
|
|
# ------------------------------------------------------------
|
|
def set_total(book_id: str, total: int):
|
|
r.set(f"progress:{book_id}:total", total)
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# COUNTERS legacy
|
|
# ------------------------------------------------------------
|
|
def inc_completed(book_id: str):
|
|
r.incr(f"progress:{book_id}:completed")
|
|
|
|
|
|
def inc_skipped(book_id: str):
|
|
r.incr(f"progress:{book_id}:skipped")
|
|
|
|
|
|
def inc_failed(book_id: str):
|
|
r.incr(f"progress:{book_id}:failed")
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# FAILED CHAPTER LIST
|
|
# ------------------------------------------------------------
|
|
def add_failed_chapter(book_id: str, chapter: int, reason: str):
|
|
entry = f"Chapter {chapter}: {reason}"
|
|
r.rpush(f"progress:{book_id}:failed_list", entry)
|
|
|
|
|
|
def get_failed_list(book_id: str):
|
|
return r.lrange(f"progress:{book_id}:failed_list", 0, -1)
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# READ STRUCT FOR UI (legacy view)
|
|
# ------------------------------------------------------------
|
|
def get_progress(book_id: str):
|
|
total = int(r.get(f"progress:{book_id}:total") or 0)
|
|
completed = int(r.get(f"progress:{book_id}:completed") or 0)
|
|
skipped = int(r.get(f"progress:{book_id}:skipped") or 0)
|
|
failed = int(r.get(f"progress:{book_id}:failed") or 0)
|
|
abort = r.exists(f"abort:{book_id}") == 1
|
|
failed_list = get_failed_list(book_id)
|
|
|
|
return {
|
|
"book_id": book_id,
|
|
"total": total,
|
|
"completed": completed,
|
|
"skipped": skipped,
|
|
"failed": failed,
|
|
"failed_list": failed_list,
|
|
"abort": abort,
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
# BOOK STATE MODEL (NIEUWE FUNCTIES — GEEN BREAKING CHANGES)
|
|
# ============================================================
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# Initialize book state at start of scrape
|
|
# ------------------------------------------------------------
|
|
def init_book_state(
|
|
book_id: str, title: str = "", url: str = "", chapters_total: int = 0
|
|
):
|
|
key = f"book:{book_id}:state"
|
|
now = int(time.time())
|
|
|
|
r.hset(
|
|
key,
|
|
mapping={
|
|
"book_id": book_id,
|
|
"title": title or "",
|
|
"url": url or "",
|
|
"status": "scraping",
|
|
"chapters_total": chapters_total,
|
|
"chapters_done": 0,
|
|
"chapters_download_skipped": 0,
|
|
"audio_total": 0,
|
|
"audio_done": 0,
|
|
"last_update": now,
|
|
},
|
|
)
|
|
|
|
# Track in library list
|
|
r.sadd("books", book_id)
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# Status + timestamps
|
|
# ------------------------------------------------------------
|
|
def set_status(book_id: str, status: str):
|
|
key = f"book:{book_id}:state"
|
|
r.hset(key, "status", status)
|
|
r.hset(key, "last_update", int(time.time()))
|
|
|
|
|
|
def set_last_update(book_id: str):
|
|
r.hset(f"book:{book_id}:state", "last_update", int(time.time()))
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# Chapter counters new model
|
|
# ------------------------------------------------------------
|
|
def set_chapter_total(book_id: str, total: int):
|
|
key = f"book:{book_id}:state"
|
|
r.hset(key, "chapters_total", total)
|
|
set_last_update(book_id)
|
|
|
|
|
|
def inc_chapter_download_skipped(book_id: str):
|
|
key = f"book:{book_id}:state"
|
|
r.hincrby(key, "chapters_download_skipped", 1)
|
|
set_last_update(book_id)
|
|
|
|
|
|
def inc_chapter_done(book_id: str):
|
|
key = f"book:{book_id}:state"
|
|
r.hincrby(key, "chapters_download_done", 1)
|
|
set_last_update(book_id)
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# Audio counters
|
|
# ------------------------------------------------------------
|
|
def set_audio_total(book_id: str, total: int):
|
|
key = f"book:{book_id}:state"
|
|
r.hset(key, "audio_total", total)
|
|
set_last_update(book_id)
|
|
|
|
|
|
def inc_audio_done(book_id: str):
|
|
key = f"book:{book_id}:state"
|
|
r.hincrby(key, "audio_done", 1)
|
|
set_last_update(book_id)
|
|
|
|
|
|
def inc_audio_skipped(book_id: str):
|
|
key = f"book:{book_id}:state"
|
|
r.hincrby(key, "audio_skipped", 1)
|
|
set_last_update(book_id)
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# Skip reasons
|
|
# ------------------------------------------------------------
|
|
def save_skip_reason(book_id: str, chapter: int, reason: str):
|
|
"""
|
|
Store explicit skip reason for transparency in UI.
|
|
"""
|
|
r.hset(f"book:{book_id}:skip_reasons", chapter, reason)
|
|
set_last_update(book_id)
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# Full state readout
|
|
# ------------------------------------------------------------
|
|
def get_state(book_id: str):
|
|
"""
|
|
Read global Book State Model + legacy progress, merged but not mixed.
|
|
"""
|
|
key = f"book:{book_id}:state"
|
|
state = r.hgetall(key) or {}
|
|
|
|
# Numeric conversions
|
|
numeric_fields = [
|
|
"chapters_total",
|
|
"chapters_download_done",
|
|
"chapters_download_skipped",
|
|
"audio_total",
|
|
"audio_skipped",
|
|
"audio_done",
|
|
]
|
|
for field in numeric_fields:
|
|
if field in state:
|
|
try:
|
|
state[field] = int(state[field])
|
|
except ValueError:
|
|
pass
|
|
|
|
# Skip reasons
|
|
state["skip_reasons"] = r.hgetall(f"book:{book_id}:skip_reasons") or {}
|
|
|
|
# Attach legacy progress separately
|
|
state["legacy_progress"] = get_progress(book_id)
|
|
|
|
return state
|