Compare commits
30 Commits
celery-int
...
main
@ -1 +1,4 @@
|
||||
output/
|
||||
venv/
|
||||
*.log
|
||||
__pycache__/
|
||||
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Local macOS Audio Worker — runs outside Docker so macOS 'say' works.
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from dotenv import load_dotenv
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
ENV_FILE = os.path.join(BASE_DIR, ".env")
|
||||
|
||||
# Load .env if present
|
||||
if os.path.exists(ENV_FILE):
|
||||
load_dotenv(ENV_FILE)
|
||||
print(f"[AUDIO-LOCAL] Loaded .env from {ENV_FILE}")
|
||||
else:
|
||||
print("[AUDIO-LOCAL] WARNING: no .env found")
|
||||
|
||||
|
||||
def main():
|
||||
print("=====================================================")
|
||||
print(" LOCAL macOS AUDIO WORKER")
|
||||
print(" Queue : audio")
|
||||
print(" Voice :", os.getenv("AUDIO_VOICE"))
|
||||
print(" Rate :", os.getenv("AUDIO_RATE"))
|
||||
print("=====================================================")
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# OVERRIDES: Local Redis instead of Docker internal hostname
|
||||
# ----------------------------------------------------------
|
||||
broker = os.getenv("REDIS_BROKER_LOCAL", "redis://127.0.0.1:6379/0")
|
||||
backend = os.getenv("REDIS_BACKEND_LOCAL", "redis://127.0.0.1:6379/1")
|
||||
|
||||
os.environ["CELERY_BROKER_URL"] = broker
|
||||
os.environ["CELERY_RESULT_BACKEND"] = backend
|
||||
|
||||
print(f"[AUDIO-LOCAL] Using Redis broker : {broker}")
|
||||
print(f"[AUDIO-LOCAL] Using Redis backend: {backend}")
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# Celery command
|
||||
# macOS requires prefork pool, and we use a single-line list.
|
||||
# ----------------------------------------------------------
|
||||
cmd = [
|
||||
"celery",
|
||||
"-A",
|
||||
"celery_app",
|
||||
"worker",
|
||||
"-Q",
|
||||
"audio",
|
||||
"-n",
|
||||
"audio_local@%h",
|
||||
"-l",
|
||||
"INFO",
|
||||
"--pool=prefork",
|
||||
"--concurrency=2",
|
||||
]
|
||||
|
||||
print("[AUDIO-LOCAL] Launching Celery via subprocess…")
|
||||
|
||||
subprocess.run(cmd, check=False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -0,0 +1,160 @@
|
||||
# ============================================================
|
||||
# File: db/db.py (UPDATED for book_idx-only architecture)
|
||||
# Purpose:
|
||||
# Raw SQLite engine for BookScraper.
|
||||
# - Connection management
|
||||
# - init_db() schema creation + safe schema upgrade
|
||||
# - upsert_book() atomic write (now uses book_idx)
|
||||
# - raw fetch helpers
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import sqlite3
|
||||
from threading import Lock
|
||||
|
||||
DB_PATH = os.environ.get("BOOKSCRAPER_DB", "/app/data/books.db")
|
||||
|
||||
# Ensure directory exists
|
||||
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
|
||||
|
||||
# Per-process connection cache
|
||||
_connection_cache = {}
|
||||
_connection_lock = Lock()
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Connection handling
|
||||
# ------------------------------------------------------------
|
||||
def get_db():
|
||||
pid = os.getpid()
|
||||
|
||||
if pid not in _connection_cache:
|
||||
with _connection_lock:
|
||||
conn = sqlite3.connect(DB_PATH, check_same_thread=False)
|
||||
conn.row_factory = sqlite3.Row
|
||||
enable_wal_mode(conn)
|
||||
_connection_cache[pid] = conn
|
||||
|
||||
return _connection_cache[pid]
|
||||
|
||||
|
||||
def enable_wal_mode(conn):
|
||||
conn.execute("PRAGMA journal_mode=DELETE;")
|
||||
conn.execute("PRAGMA synchronous=NORMAL;")
|
||||
conn.commit()
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Schema creation + SAFE schema upgrades
|
||||
# ------------------------------------------------------------
|
||||
def init_db():
|
||||
conn = get_db()
|
||||
|
||||
# --------------------------------------------------------
|
||||
# BASE SCHEMA — book_idx is now PRIMARY KEY
|
||||
# --------------------------------------------------------
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS books (
|
||||
book_idx INTEGER PRIMARY KEY,
|
||||
title TEXT,
|
||||
author TEXT,
|
||||
description TEXT,
|
||||
cover_url TEXT,
|
||||
cover_path TEXT,
|
||||
book_url TEXT,
|
||||
|
||||
chapters_total INTEGER,
|
||||
|
||||
status TEXT,
|
||||
downloaded INTEGER DEFAULT 0,
|
||||
parsed INTEGER DEFAULT 0,
|
||||
audio_done INTEGER DEFAULT 0,
|
||||
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
processdate DATETIME,
|
||||
last_update DATETIME
|
||||
);
|
||||
"""
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
# --------------------------------------------------------
|
||||
# SCHEMA UPGRADE UTILITY
|
||||
# --------------------------------------------------------
|
||||
def add_column(name, type_):
|
||||
try:
|
||||
conn.execute(f"ALTER TABLE books ADD COLUMN {name} {type_};")
|
||||
except:
|
||||
pass # column already exists
|
||||
|
||||
cols = conn.execute("PRAGMA table_info(books);").fetchall()
|
||||
colnames = [c[1] for c in cols]
|
||||
|
||||
# --------------------------------------------------------
|
||||
# UPGRADE NEW FIELDS — future-proof, matched with Redis state model
|
||||
# --------------------------------------------------------
|
||||
|
||||
# (book_idx already exists as PRIMARY KEY — no need to add)
|
||||
|
||||
add_column("description", "TEXT")
|
||||
add_column("cover_path", "TEXT")
|
||||
add_column("book_url", "TEXT")
|
||||
|
||||
# Download counters
|
||||
add_column("chapters_download_done", "INTEGER DEFAULT 0")
|
||||
add_column("chapters_download_skipped", "INTEGER DEFAULT 0")
|
||||
|
||||
# Audio counters
|
||||
add_column("audio_skipped", "INTEGER DEFAULT 0")
|
||||
|
||||
# Optional future fields
|
||||
add_column("audio_total", "INTEGER DEFAULT 0")
|
||||
|
||||
conn.commit()
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# WRITE OPERATIONS (book_idx-based UPSERT)
|
||||
# ------------------------------------------------------------
|
||||
def upsert_book(book_idx, **fields):
|
||||
"""
|
||||
UPSERT by book_idx.
|
||||
Replaces old upsert that used book_id.
|
||||
"""
|
||||
|
||||
conn = get_db()
|
||||
|
||||
keys = ["book_idx"] + list(fields.keys())
|
||||
values = [book_idx] + list(fields.values())
|
||||
placeholders = ",".join(["?"] * len(values))
|
||||
|
||||
updates = ", ".join([f"{k} = excluded.{k}" for k in fields.keys()])
|
||||
|
||||
sql = f"""
|
||||
INSERT INTO books ({','.join(keys)})
|
||||
VALUES ({placeholders})
|
||||
ON CONFLICT(book_idx)
|
||||
DO UPDATE SET {updates},
|
||||
last_update = CURRENT_TIMESTAMP;
|
||||
"""
|
||||
|
||||
conn.execute(sql, values)
|
||||
conn.commit()
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# RAW READ OPERATIONS
|
||||
# ------------------------------------------------------------
|
||||
def _raw_get_book(book_idx):
|
||||
conn = get_db()
|
||||
row = conn.execute(
|
||||
"SELECT * FROM books WHERE book_idx = ?;", (book_idx,)
|
||||
).fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
|
||||
def _raw_get_all_books():
|
||||
conn = get_db()
|
||||
cur = conn.execute("SELECT * FROM books ORDER BY created_at DESC;")
|
||||
return [dict(row) for row in cur.fetchall()]
|
||||
@ -0,0 +1,320 @@
|
||||
# ============================================================
|
||||
# File: db/repository.py
|
||||
# Purpose:
|
||||
# Unified façade for BookScraper database state.
|
||||
#
|
||||
# Responsibilities:
|
||||
# - Route metadata → SQLite
|
||||
# - Route counters → Redis (live) + SQLite (snapshot)
|
||||
# - Provide a clean API for tasks and Flask UI
|
||||
# ============================================================
|
||||
# ============================================================
|
||||
# UPDATED — canonical read model via get_book_state
|
||||
# ============================================================
|
||||
|
||||
from scraper.logger_decorators import logcall
|
||||
from logbus.publisher import log
|
||||
|
||||
import redis
|
||||
import os
|
||||
|
||||
# ============================================================
|
||||
# SQL low-level engines (snapshot storage)
|
||||
# ============================================================
|
||||
from db.state_sql import (
|
||||
sql_fetch_book,
|
||||
sql_fetch_all_books,
|
||||
sql_set_status,
|
||||
sql_set_chapters_total,
|
||||
sql_register_book,
|
||||
sql_update_book,
|
||||
)
|
||||
|
||||
# ============================================================
|
||||
# REDIS low-level engines (live counters)
|
||||
# ============================================================
|
||||
from db.state_redis import (
|
||||
redis_set_status,
|
||||
redis_set_chapters_total,
|
||||
redis_inc_download_done,
|
||||
redis_inc_download_skipped,
|
||||
redis_inc_parsed_done,
|
||||
redis_inc_audio_done,
|
||||
redis_inc_audio_skipped,
|
||||
)
|
||||
|
||||
# ============================================================
|
||||
# Redis client (read-only for legacy + guards)
|
||||
# ============================================================
|
||||
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
|
||||
_r = redis.Redis.from_url(REDIS_URL, decode_responses=True)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# LEGACY PROGRESS (UI only, unchanged)
|
||||
# ============================================================
|
||||
def _legacy_get_progress(book_idx):
|
||||
return {
|
||||
"book_idx": book_idx,
|
||||
"total": int(_r.get(f"progress:{book_idx}:total") or 0),
|
||||
"completed": int(_r.get(f"progress:{book_idx}:completed") or 0),
|
||||
"skipped": int(_r.get(f"progress:{book_idx}:skipped") or 0),
|
||||
"failed": int(_r.get(f"progress:{book_idx}:failed") or 0),
|
||||
"abort": _r.exists(f"abort:{book_idx}") == 1,
|
||||
"failed_list": _r.lrange(f"progress:{book_idx}:failed_list", 0, -1),
|
||||
}
|
||||
|
||||
|
||||
@logcall
|
||||
def get_progress(book_idx):
|
||||
return _legacy_get_progress(book_idx)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# FETCH (SQLite snapshot)
|
||||
# ============================================================
|
||||
@logcall
|
||||
def fetch_book(book_idx):
|
||||
return sql_fetch_book(book_idx)
|
||||
|
||||
|
||||
@logcall
|
||||
def fetch_all_books():
|
||||
return sql_fetch_all_books()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# INIT / UPDATE METADATA
|
||||
# ============================================================
|
||||
@logcall
|
||||
def register_book(
|
||||
book_idx,
|
||||
title,
|
||||
author=None,
|
||||
description=None,
|
||||
cover_url=None,
|
||||
cover_path=None,
|
||||
book_url=None,
|
||||
):
|
||||
sql_register_book(
|
||||
book_idx,
|
||||
{
|
||||
"book_idx": book_idx,
|
||||
"title": title,
|
||||
"author": author,
|
||||
"description": description,
|
||||
"cover_url": cover_url,
|
||||
"cover_path": cover_path,
|
||||
"book_url": book_url,
|
||||
"chapters_total": 0,
|
||||
"status": "registered",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@logcall
|
||||
def update_book_after_full_scrape(
|
||||
book_idx,
|
||||
title=None,
|
||||
author=None,
|
||||
description=None,
|
||||
cover_url=None,
|
||||
chapters_total=None,
|
||||
):
|
||||
fields = {}
|
||||
if title is not None:
|
||||
fields["title"] = title
|
||||
if author is not None:
|
||||
fields["author"] = author
|
||||
if description is not None:
|
||||
fields["description"] = description
|
||||
if cover_url is not None:
|
||||
fields["cover_url"] = cover_url
|
||||
if chapters_total is not None:
|
||||
fields["chapters_total"] = chapters_total
|
||||
|
||||
fields["status"] = "active"
|
||||
sql_update_book(book_idx, fields)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# STATUS
|
||||
# ============================================================
|
||||
@logcall
|
||||
def set_status(book_idx, status):
|
||||
redis_set_status(book_idx, status)
|
||||
sql_set_status(book_idx, status)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# TOTALS
|
||||
# ============================================================
|
||||
@logcall
|
||||
def set_chapters_total(book_idx, total):
|
||||
redis_set_chapters_total(book_idx, total)
|
||||
sql_set_chapters_total(book_idx, total)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# COUNTERS — WRITE ONLY
|
||||
# ============================================================
|
||||
@logcall
|
||||
def inc_download_done(book_idx, amount=1):
|
||||
redis_inc_download_done(book_idx, amount)
|
||||
|
||||
|
||||
@logcall
|
||||
def inc_download_skipped(book_idx, amount=1):
|
||||
redis_inc_download_skipped(book_idx, amount)
|
||||
|
||||
|
||||
@logcall
|
||||
def inc_parsed_done(book_idx, amount=1):
|
||||
redis_inc_parsed_done(book_idx, amount)
|
||||
|
||||
|
||||
@logcall
|
||||
def inc_audio_done(book_idx, amount=1):
|
||||
redis_inc_audio_done(book_idx, amount)
|
||||
|
||||
|
||||
@logcall
|
||||
def inc_audio_skipped(book_idx, amount=1):
|
||||
redis_inc_audio_skipped(book_idx, amount)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# CANONICAL READ MODEL
|
||||
# ============================================================
|
||||
@logcall
|
||||
def get_book_state(book_idx):
|
||||
"""
|
||||
Canonical merged read model.
|
||||
|
||||
Rules:
|
||||
- SQL = snapshot baseline
|
||||
- Redis = live counters
|
||||
- merged = max(sql, redis)
|
||||
- capped at chapters_total
|
||||
"""
|
||||
|
||||
sqlite_row = sql_fetch_book(book_idx) or {}
|
||||
redis_state = _r.hgetall(f"book:{book_idx}:state") or {}
|
||||
|
||||
def _int(v):
|
||||
try:
|
||||
return int(v)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
chapters_total = _int(sqlite_row.get("chapters_total"))
|
||||
|
||||
# SQL snapshot
|
||||
sql_downloaded = _int(sqlite_row.get("downloaded"))
|
||||
sql_audio_done = _int(sqlite_row.get("audio_done"))
|
||||
sql_audio_skipped = _int(sqlite_row.get("audio_skipped"))
|
||||
|
||||
# Redis live
|
||||
redis_downloaded = _int(redis_state.get("chapters_download_done")) + _int(
|
||||
redis_state.get("chapters_download_skipped")
|
||||
)
|
||||
redis_audio_done = _int(redis_state.get("audio_done"))
|
||||
redis_audio_skipped = _int(redis_state.get("audio_skipped"))
|
||||
|
||||
# Merge
|
||||
merged_downloaded = max(sql_downloaded, redis_downloaded)
|
||||
merged_audio_done = max(sql_audio_done, redis_audio_done)
|
||||
merged_audio_skipped = max(sql_audio_skipped, redis_audio_skipped)
|
||||
|
||||
if chapters_total > 0:
|
||||
merged_downloaded = min(merged_downloaded, chapters_total)
|
||||
merged_audio_done = min(merged_audio_done, chapters_total)
|
||||
merged_audio_skipped = min(merged_audio_skipped, chapters_total)
|
||||
|
||||
audio_completed = merged_audio_done + merged_audio_skipped
|
||||
|
||||
# Build state
|
||||
state = dict(sqlite_row)
|
||||
state.update(
|
||||
{
|
||||
"downloaded": merged_downloaded,
|
||||
"audio_done": merged_audio_done,
|
||||
"audio_skipped": merged_audio_skipped,
|
||||
"chapters_total": chapters_total,
|
||||
}
|
||||
)
|
||||
|
||||
# Derived status
|
||||
status = sqlite_row.get("status") or "unknown"
|
||||
if chapters_total > 0:
|
||||
if merged_downloaded < chapters_total:
|
||||
status = "downloading"
|
||||
elif merged_downloaded == chapters_total and audio_completed < chapters_total:
|
||||
status = "audio"
|
||||
elif audio_completed >= chapters_total:
|
||||
status = "done"
|
||||
|
||||
state["status"] = status
|
||||
return state
|
||||
|
||||
|
||||
# ============================================================
|
||||
# READ HELPERS (VIA get_book_state ONLY)
|
||||
# ============================================================
|
||||
@logcall
|
||||
def get_chapters_total(book_idx):
|
||||
return int(get_book_state(book_idx).get("chapters_total", 0))
|
||||
|
||||
|
||||
@logcall
|
||||
def get_audio_done(book_idx):
|
||||
return int(get_book_state(book_idx).get("audio_done", 0))
|
||||
|
||||
|
||||
@logcall
|
||||
def get_audio_completed_total(book_idx):
|
||||
state = get_book_state(book_idx)
|
||||
return int(state.get("audio_done", 0)) + int(state.get("audio_skipped", 0))
|
||||
|
||||
|
||||
# ============================================================
|
||||
# STATUSCHECK GUARD (INTENTIONAL DIRECT REDIS)
|
||||
# ============================================================
|
||||
@logcall
|
||||
def try_trigger_statuscheck(book_idx):
|
||||
return bool(_r.set(f"book:{book_idx}:statuscheck:triggered", "1", nx=True))
|
||||
|
||||
|
||||
# ============================================================
|
||||
# ACTIVE / REGISTERED BOOK LISTS (UI API)
|
||||
# ============================================================
|
||||
@logcall
|
||||
def get_registered_books():
|
||||
"""
|
||||
Books visible in the 'registered' list in the UI.
|
||||
"""
|
||||
all_books = sql_fetch_all_books()
|
||||
HIDDEN_STATES = {"hidden"}
|
||||
return [b for b in all_books if b.get("status") not in HIDDEN_STATES]
|
||||
|
||||
|
||||
@logcall
|
||||
def get_active_books():
|
||||
"""
|
||||
Books currently active in the dashboard.
|
||||
"""
|
||||
all_books = sql_fetch_all_books()
|
||||
HIDDEN_STATES = {"hidden", "done"}
|
||||
return [b for b in all_books if b.get("status") not in HIDDEN_STATES]
|
||||
|
||||
|
||||
@logcall
|
||||
def store_m4b_error(book_idx: str, volume: str, error_text: str):
|
||||
"""
|
||||
Passive storage of m4b errors.
|
||||
No logic, no retries, no state transitions.
|
||||
"""
|
||||
key = f"book:{book_idx}:m4b:errors"
|
||||
entry = f"{volume}: {error_text}"
|
||||
|
||||
_r.rpush(key, entry)
|
||||
@ -0,0 +1,130 @@
|
||||
# ============================================================
|
||||
# File: db/state_redis.py (UPDATED for book_idx-only architecture)
|
||||
# Purpose:
|
||||
# Low-level Redis counters/state for BookScraper.
|
||||
# Used ONLY by db.repository façade.
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import time
|
||||
import redis
|
||||
|
||||
from logbus.publisher import log
|
||||
|
||||
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
|
||||
r = redis.Redis.from_url(REDIS_URL, decode_responses=True)
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# INTERNAL KEY BUILDER
|
||||
# ------------------------------------------------------------
|
||||
def _key(book_idx: str) -> str:
|
||||
return f"book:{book_idx}:state"
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# STATUS
|
||||
# ------------------------------------------------------------
|
||||
def redis_set_status(book_idx: str, status: str):
|
||||
log(f"[DB-REDIS] Setting status for {book_idx} to {status}")
|
||||
key = _key(book_idx)
|
||||
r.hset(key, "status", status)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# TOTAL CHAPTERS
|
||||
# ------------------------------------------------------------
|
||||
def redis_set_chapters_total(book_idx: str, total: int):
|
||||
key = _key(book_idx)
|
||||
r.hset(key, "chapters_total", total)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# DOWNLOAD COUNTERS
|
||||
# ------------------------------------------------------------
|
||||
def redis_inc_download_done(book_idx: str, amount: int = 1):
|
||||
log(f"[DB-REDIS] Incrementing download done for {book_idx} by {amount}")
|
||||
key = _key(book_idx)
|
||||
r.hincrby(key, "chapters_download_done", amount)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
def redis_inc_download_skipped(book_idx: str, amount: int = 1):
|
||||
log(f"[DB-REDIS] Incrementing download skipped for {book_idx} by {amount}")
|
||||
key = _key(book_idx)
|
||||
r.hincrby(key, "chapters_download_skipped", amount)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PARSE COUNTERS
|
||||
# ------------------------------------------------------------
|
||||
def redis_inc_parsed_done(book_idx: str, amount: int = 1):
|
||||
log(f"[DB-REDIS] Incrementing parsed done for {book_idx} by {amount}")
|
||||
key = _key(book_idx)
|
||||
r.hincrby(key, "chapters_parsed_done", amount)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# AUDIO COUNTERS
|
||||
# ------------------------------------------------------------
|
||||
def redis_inc_audio_done(book_idx: str, amount: int = 1):
|
||||
log(f"[DB-REDIS] Incrementing audio done for {book_idx} by {amount}")
|
||||
key = _key(book_idx)
|
||||
r.hincrby(key, "audio_done", amount)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
def redis_inc_audio_skipped(book_idx: str, amount: int = 1):
|
||||
log(f"[DB-REDIS] Incrementing audio skipped for {book_idx} by {amount}")
|
||||
key = _key(book_idx)
|
||||
r.hincrby(key, "audio_skipped", amount)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# INITIALISE BOOK STATE
|
||||
# ------------------------------------------------------------
|
||||
def init_book_state(book_idx: str, title: str, url: str, chapters_total: int):
|
||||
"""
|
||||
Initialiseert de complete Redis state voor een nieuw boek.
|
||||
LET OP:
|
||||
- Als een key al bestaat → NIET resetten (progress behouden).
|
||||
- Alleen missende velden worden toegevoegd.
|
||||
"""
|
||||
|
||||
key = f"book:{book_idx}:state"
|
||||
|
||||
# Bestaat al? Dan vullen we alleen missende velden aan.
|
||||
exists = r.exists(key)
|
||||
|
||||
pipeline = r.pipeline()
|
||||
|
||||
# Basis metadata
|
||||
pipeline.hsetnx(key, "book_id", book_idx)
|
||||
pipeline.hsetnx(key, "title", title or "")
|
||||
pipeline.hsetnx(key, "url", url or "")
|
||||
|
||||
# State
|
||||
pipeline.hsetnx(key, "status", "registered")
|
||||
|
||||
# Counters
|
||||
pipeline.hsetnx(key, "chapters_total", chapters_total)
|
||||
pipeline.hsetnx(key, "chapters_download_done", 0)
|
||||
pipeline.hsetnx(key, "chapters_download_skipped", 0)
|
||||
pipeline.hsetnx(key, "chapters_parsed_done", 0)
|
||||
pipeline.hsetnx(key, "audio_done", 0)
|
||||
pipeline.hsetnx(key, "audio_skipped", 0)
|
||||
|
||||
# Timestamp
|
||||
pipeline.hset(key, "last_update", int(time.time()))
|
||||
|
||||
pipeline.execute()
|
||||
|
||||
if exists:
|
||||
log(f"[DB-REDIS] init_book_state(): UPDATED existing state for {book_idx}")
|
||||
else:
|
||||
log(f"[DB-REDIS] init_book_state(): CREATED new state for {book_idx}")
|
||||
@ -0,0 +1,178 @@
|
||||
# ============================================================
|
||||
# File: db/state_sql.py (UPDATED for book_idx-only architecture)
|
||||
# Purpose:
|
||||
# Low-level SQLite snapshot layer for BookScraper metadata.
|
||||
# Used ONLY through db.repository façade.
|
||||
# ============================================================
|
||||
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
from logbus.publisher import log
|
||||
|
||||
# Must match db/db.py
|
||||
DB_PATH = os.getenv("BOOKSCRAPER_DB", "/app/data/books.db")
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# INTERNAL HELPERS
|
||||
# ------------------------------------------------------------
|
||||
def _connect():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# FETCH
|
||||
# ------------------------------------------------------------
|
||||
def sql_fetch_book(book_idx):
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT * FROM books WHERE book_idx = ?", (book_idx,))
|
||||
row = cur.fetchone()
|
||||
conn.close()
|
||||
return dict(row) if row else None
|
||||
|
||||
|
||||
def sql_fetch_all_books():
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT * FROM books ORDER BY created_at DESC")
|
||||
rows = cur.fetchall()
|
||||
conn.close()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# REGISTER / UPDATE
|
||||
# ------------------------------------------------------------
|
||||
def sql_register_book(book_idx, fields: dict):
|
||||
"""
|
||||
Insert or replace entire book record.
|
||||
book_idx is the PRIMARY KEY.
|
||||
"""
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
cols = ", ".join(["book_idx"] + list(fields.keys()))
|
||||
placeholders = ", ".join(["?"] * (1 + len(fields)))
|
||||
values = [book_idx] + list(fields.values())
|
||||
|
||||
cur.execute(
|
||||
f"INSERT OR REPLACE INTO books ({cols}) VALUES ({placeholders})",
|
||||
values,
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def sql_update_book(book_idx, fields: dict):
|
||||
if not fields:
|
||||
return
|
||||
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
set_clause = ", ".join([f"{k} = ?" for k in fields])
|
||||
params = list(fields.values()) + [book_idx]
|
||||
|
||||
cur.execute(
|
||||
f"UPDATE books SET {set_clause} WHERE book_idx = ?",
|
||||
params,
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# STATUS
|
||||
# ------------------------------------------------------------
|
||||
def sql_set_status(book_idx, status: str):
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"UPDATE books SET status = ? WHERE book_idx = ?",
|
||||
(status, book_idx),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# CHAPTER TOTAL (snapshot)
|
||||
# ------------------------------------------------------------
|
||||
def sql_set_chapters_total(book_idx, total: int):
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"UPDATE books SET chapters_total = ? WHERE book_idx = ?",
|
||||
(total, book_idx),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# COUNTERS (SNAPSHOT-ONLY)
|
||||
# ------------------------------------------------------------
|
||||
def sql_inc_downloaded(book_idx, amount=1):
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE books
|
||||
SET downloaded = COALESCE(downloaded,0) + ?
|
||||
WHERE book_idx = ?
|
||||
""",
|
||||
(amount, book_idx),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def sql_inc_parsed(book_idx, amount=1):
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE books
|
||||
SET parsed = COALESCE(parsed,0) + ?
|
||||
WHERE book_idx = ?
|
||||
""",
|
||||
(amount, book_idx),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def sql_inc_audio_done(book_idx, amount=1):
|
||||
log(f"[DB-SQL] Incrementing audio_done for {book_idx} by {amount}")
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE books
|
||||
SET audio_done = COALESCE(audio_done,0) + ?
|
||||
WHERE book_idx = ?
|
||||
""",
|
||||
(amount, book_idx),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def sql_inc_audio_skipped(book_idx, amount=1):
|
||||
log(f"[DB-SQL] Incrementing audio_skipped for {book_idx} by {amount}")
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE books
|
||||
SET audio_skipped = COALESCE(audio_skipped,0) + ?
|
||||
WHERE book_idx = ?
|
||||
""",
|
||||
(amount, book_idx),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
@ -1,9 +1,15 @@
|
||||
FROM python:3.12-slim
|
||||
WORKDIR /app
|
||||
|
||||
# Install audio worker dependencies
|
||||
COPY requirements.audio.txt /app/requirements.audio.txt
|
||||
RUN pip install --no-cache-dir -r /app/requirements.audio.txt
|
||||
|
||||
# Celery is noodzakelijk voor de worker
|
||||
RUN pip install --no-cache-dir celery
|
||||
|
||||
# Copy project
|
||||
COPY . /app
|
||||
|
||||
CMD ["python3", "-c", "print('audio worker ready')"]
|
||||
# Start the AUDIO Celery worker
|
||||
CMD ["celery", "-A", "celery_app", "worker", "-Q", "audio", "-n", "audio@%h", "-l", "INFO"]
|
||||
|
||||
@ -0,0 +1,70 @@
|
||||
FROM debian:12
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# System + PHP (PHP 8.2 native)
|
||||
# ----------------------------------------------------------
|
||||
RUN apt-get update && apt-get install -y \
|
||||
ffmpeg \
|
||||
curl \
|
||||
ca-certificates \
|
||||
bash \
|
||||
php-cli \
|
||||
php-intl \
|
||||
php-json \
|
||||
php-mbstring \
|
||||
php-xml \
|
||||
php-curl \
|
||||
php-zip \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-venv \
|
||||
\
|
||||
# build deps for mp4v2
|
||||
git \
|
||||
build-essential \
|
||||
autoconf \
|
||||
automake \
|
||||
libtool \
|
||||
pkg-config \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# Python venv (PEP 668 compliant)
|
||||
# ----------------------------------------------------------
|
||||
RUN python3 -m venv /opt/venv
|
||||
ENV PATH="/opt/venv/bin:/usr/local/bin:$PATH"
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# Build & install mp4v2 (mp4info)
|
||||
# ----------------------------------------------------------
|
||||
WORKDIR /tmp
|
||||
|
||||
RUN git clone https://github.com/sandreas/mp4v2 \
|
||||
&& cd mp4v2 \
|
||||
&& ./configure \
|
||||
&& make -j$(nproc) \
|
||||
&& make install \
|
||||
&& echo "/usr/local/lib" > /etc/ld.so.conf.d/mp4v2.conf \
|
||||
&& ldconfig \
|
||||
&& cd / \
|
||||
&& rm -rf /tmp/mp4v2
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# Install m4b-tool
|
||||
# ----------------------------------------------------------
|
||||
RUN curl -L https://github.com/sandreas/m4b-tool/releases/latest/download/m4b-tool.phar \
|
||||
-o /usr/local/bin/m4b-tool \
|
||||
&& chmod +x /usr/local/bin/m4b-tool
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# App
|
||||
# ----------------------------------------------------------
|
||||
WORKDIR /app
|
||||
COPY requirements.txt /app/requirements.txt
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . /app
|
||||
|
||||
CMD ["bash"]
|
||||
@ -1,12 +1,80 @@
|
||||
# logbus/publisher.py
|
||||
import redis
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
|
||||
logger = logging.getLogger("logbus")
|
||||
|
||||
logger.setLevel(logging.WARNING)
|
||||
|
||||
# ============================================================
|
||||
# FILE LOGGER — log.txt in BOOKSCRAPER_OUTPUT_DIR
|
||||
# ============================================================
|
||||
try:
|
||||
root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
|
||||
os.makedirs(root, exist_ok=True)
|
||||
|
||||
file_path = os.path.join(root, "log.txt")
|
||||
|
||||
file_handler = logging.FileHandler(file_path, mode="a", encoding="utf-8")
|
||||
file_formatter = logging.Formatter("%(message)s") # exact zoals input
|
||||
file_handler.setFormatter(file_formatter)
|
||||
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
except Exception:
|
||||
# Logging naar file mag nooit de app laten crashen
|
||||
pass
|
||||
|
||||
|
||||
def log(message: str):
|
||||
"""
|
||||
Dumb logger:
|
||||
- skip lege messages
|
||||
- stuur message 1:1 door
|
||||
- geen prefixes
|
||||
- geen mutaties
|
||||
"""
|
||||
|
||||
if not message or not message.strip():
|
||||
return
|
||||
|
||||
# console
|
||||
logger.warning(message)
|
||||
|
||||
# UI-echo
|
||||
try:
|
||||
from scraper.ui_log import push_ui
|
||||
|
||||
push_ui(message)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Delta-based log retrieval using Redis indexes
|
||||
# ============================================================
|
||||
|
||||
|
||||
def get_ui_logs_delta(last_index: int):
|
||||
"""
|
||||
Returns (new_lines, total_count)
|
||||
Only returns log lines AFTER last_index.
|
||||
|
||||
Example:
|
||||
last_index = 10 → returns logs with Redis indexes 11..end
|
||||
"""
|
||||
# Determine total lines in buffer
|
||||
total = r.llen(UI_LOG_KEY)
|
||||
|
||||
r = redis.Redis.from_url(REDIS_URL)
|
||||
if total == 0:
|
||||
return [], 0
|
||||
|
||||
# First load OR index invalid → send entire buffer
|
||||
if last_index < 0 or last_index >= total:
|
||||
logs = r.lrange(UI_LOG_KEY, 0, -1)
|
||||
return logs, total
|
||||
|
||||
def log(message):
|
||||
print("[LOG]", message)
|
||||
r.publish("logs", message)
|
||||
# Only new lines:
|
||||
new_lines = r.lrange(UI_LOG_KEY, last_index + 1, -1)
|
||||
return new_lines, total
|
||||
|
||||
@ -0,0 +1 @@
|
||||
Subproject commit 480a73324f53d0d24bea4931c3902097f8e2a663
|
||||
@ -0,0 +1,208 @@
|
||||
# ============================================================
|
||||
# File: scraper/progress.py
|
||||
# Purpose: Track chapter counters for WebGUI progress +
|
||||
# Book State Model (Redis-backed).
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import time
|
||||
import redis
|
||||
|
||||
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
|
||||
r = redis.Redis.from_url(REDIS_URL, decode_responses=True)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# LEGACY PROGRESS FUNCTIONS (ONAANGEROERD BEHOUDEN)
|
||||
# ============================================================
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# SET TOTAL
|
||||
# ------------------------------------------------------------
|
||||
def set_total(book_id: str, total: int):
|
||||
r.set(f"progress:{book_id}:total", total)
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# COUNTERS legacy
|
||||
# ------------------------------------------------------------
|
||||
def inc_completed(book_id: str):
|
||||
r.incr(f"progress:{book_id}:completed")
|
||||
|
||||
|
||||
def inc_skipped(book_id: str):
|
||||
r.incr(f"progress:{book_id}:skipped")
|
||||
|
||||
|
||||
def inc_failed(book_id: str):
|
||||
r.incr(f"progress:{book_id}:failed")
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# FAILED CHAPTER LIST
|
||||
# ------------------------------------------------------------
|
||||
def add_failed_chapter(book_id: str, chapter: int, reason: str):
|
||||
entry = f"Chapter {chapter}: {reason}"
|
||||
r.rpush(f"progress:{book_id}:failed_list", entry)
|
||||
|
||||
|
||||
def get_failed_list(book_id: str):
|
||||
return r.lrange(f"progress:{book_id}:failed_list", 0, -1)
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# READ STRUCT FOR UI (legacy view)
|
||||
# ------------------------------------------------------------
|
||||
def get_progress(book_id: str):
|
||||
total = int(r.get(f"progress:{book_id}:total") or 0)
|
||||
completed = int(r.get(f"progress:{book_id}:completed") or 0)
|
||||
skipped = int(r.get(f"progress:{book_id}:skipped") or 0)
|
||||
failed = int(r.get(f"progress:{book_id}:failed") or 0)
|
||||
abort = r.exists(f"abort:{book_id}") == 1
|
||||
failed_list = get_failed_list(book_id)
|
||||
|
||||
return {
|
||||
"book_id": book_id,
|
||||
"total": total,
|
||||
"completed": completed,
|
||||
"skipped": skipped,
|
||||
"failed": failed,
|
||||
"failed_list": failed_list,
|
||||
"abort": abort,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# BOOK STATE MODEL (NIEUWE FUNCTIES — GEEN BREAKING CHANGES)
|
||||
# ============================================================
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Initialize book state at start of scrape
|
||||
# ------------------------------------------------------------
|
||||
def init_book_state(
|
||||
book_id: str, title: str = "", url: str = "", chapters_total: int = 0
|
||||
):
|
||||
key = f"book:{book_id}:state"
|
||||
now = int(time.time())
|
||||
|
||||
r.hset(
|
||||
key,
|
||||
mapping={
|
||||
"book_id": book_id,
|
||||
"title": title or "",
|
||||
"url": url or "",
|
||||
"status": "scraping",
|
||||
"chapters_total": chapters_total,
|
||||
"chapters_done": 0,
|
||||
"chapters_download_skipped": 0,
|
||||
"audio_total": 0,
|
||||
"audio_done": 0,
|
||||
"last_update": now,
|
||||
},
|
||||
)
|
||||
|
||||
# Track in library list
|
||||
r.sadd("books", book_id)
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Status + timestamps
|
||||
# ------------------------------------------------------------
|
||||
def set_status(book_id: str, status: str):
|
||||
key = f"book:{book_id}:state"
|
||||
r.hset(key, "status", status)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
def set_last_update(book_id: str):
|
||||
r.hset(f"book:{book_id}:state", "last_update", int(time.time()))
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Chapter counters new model
|
||||
# ------------------------------------------------------------
|
||||
def set_chapter_total(book_id: str, total: int):
|
||||
key = f"book:{book_id}:state"
|
||||
r.hset(key, "chapters_total", total)
|
||||
set_last_update(book_id)
|
||||
|
||||
|
||||
def inc_chapter_download_skipped(book_id: str):
|
||||
key = f"book:{book_id}:state"
|
||||
r.hincrby(key, "chapters_download_skipped", 1)
|
||||
set_last_update(book_id)
|
||||
|
||||
|
||||
def inc_chapter_done(book_id: str):
|
||||
key = f"book:{book_id}:state"
|
||||
r.hincrby(key, "chapters_download_done", 1)
|
||||
set_last_update(book_id)
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Audio counters
|
||||
# ------------------------------------------------------------
|
||||
def set_audio_total(book_id: str, total: int):
|
||||
key = f"book:{book_id}:state"
|
||||
r.hset(key, "audio_total", total)
|
||||
set_last_update(book_id)
|
||||
|
||||
|
||||
def inc_audio_done(book_id: str):
|
||||
key = f"book:{book_id}:state"
|
||||
r.hincrby(key, "audio_done", 1)
|
||||
set_last_update(book_id)
|
||||
|
||||
|
||||
def inc_audio_skipped(book_id: str):
|
||||
key = f"book:{book_id}:state"
|
||||
r.hincrby(key, "audio_skipped", 1)
|
||||
set_last_update(book_id)
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Skip reasons
|
||||
# ------------------------------------------------------------
|
||||
def save_skip_reason(book_id: str, chapter: int, reason: str):
|
||||
"""
|
||||
Store explicit skip reason for transparency in UI.
|
||||
"""
|
||||
r.hset(f"book:{book_id}:skip_reasons", chapter, reason)
|
||||
set_last_update(book_id)
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Full state readout
|
||||
# ------------------------------------------------------------
|
||||
def get_state(book_id: str):
|
||||
"""
|
||||
Read global Book State Model + legacy progress, merged but not mixed.
|
||||
"""
|
||||
key = f"book:{book_id}:state"
|
||||
state = r.hgetall(key) or {}
|
||||
|
||||
# Numeric conversions
|
||||
numeric_fields = [
|
||||
"chapters_total",
|
||||
"chapters_download_done",
|
||||
"chapters_download_skipped",
|
||||
"audio_total",
|
||||
"audio_skipped",
|
||||
"audio_done",
|
||||
]
|
||||
for field in numeric_fields:
|
||||
if field in state:
|
||||
try:
|
||||
state[field] = int(state[field])
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Skip reasons
|
||||
state["skip_reasons"] = r.hgetall(f"book:{book_id}:skip_reasons") or {}
|
||||
|
||||
# Attach legacy progress separately
|
||||
state["legacy_progress"] = get_progress(book_id)
|
||||
|
||||
return state
|
||||
@ -0,0 +1,124 @@
|
||||
import os
|
||||
import redis
|
||||
|
||||
from scraper.logger_decorators import logcall
|
||||
from scraper.ui_log import push_ui
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Default Redis connection (Docker workers)
|
||||
# ---------------------------------------------------------
|
||||
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
|
||||
r = redis.Redis.from_url(REDIS_URL, decode_responses=True)
|
||||
|
||||
# Debug mode (optional)
|
||||
ABORT_DEBUG = os.getenv("ABORT_DEBUG", "1") == "1"
|
||||
|
||||
# Avoid duplicate spam
|
||||
_seen_debug_keys = set()
|
||||
|
||||
|
||||
# =========================================================
|
||||
# INTERNAL DEBUGGING
|
||||
# =========================================================
|
||||
def _debug(msg: str):
|
||||
print(msg)
|
||||
push_ui(msg)
|
||||
|
||||
|
||||
# =========================================================
|
||||
# ABORT FLAG — unified book_idx
|
||||
# =========================================================
|
||||
|
||||
|
||||
def set_abort(book_idx: str):
|
||||
"""Enable abort mode for book_idx."""
|
||||
key = f"abort:{book_idx}"
|
||||
r.set(key, "1")
|
||||
|
||||
if ABORT_DEBUG:
|
||||
_debug(f"[ABORT] SET {key}")
|
||||
|
||||
|
||||
def clear_abort(book_idx: str):
|
||||
"""Clear abort flag."""
|
||||
key = f"abort:{book_idx}"
|
||||
r.delete(key)
|
||||
|
||||
if ABORT_DEBUG:
|
||||
_debug(f"[ABORT] CLEAR {key}")
|
||||
|
||||
|
||||
def abort_requested(book_idx: str, redis_client=None) -> bool:
|
||||
"""
|
||||
Check whether abort flag is active for book_idx.
|
||||
|
||||
redis_client:
|
||||
- Docker workers → None → use default Redis (r)
|
||||
- Local macOS audio worker → passes Redis(host=127.0.0.1)
|
||||
"""
|
||||
client = redis_client or r
|
||||
key = f"abort:{book_idx}"
|
||||
|
||||
try:
|
||||
exists = client.exists(key)
|
||||
|
||||
if ABORT_DEBUG:
|
||||
|
||||
# Log only once per book
|
||||
if key not in _seen_debug_keys:
|
||||
try:
|
||||
conn = client.connection_pool.connection_kwargs
|
||||
host = conn.get("host")
|
||||
port = conn.get("port")
|
||||
db = conn.get("db")
|
||||
_debug(
|
||||
# f"[ABORT_DEBUG] first check book_idx={book_idx} "
|
||||
f"redis={host}:{port} db={db}"
|
||||
)
|
||||
except Exception:
|
||||
_debug(f"[ABORT_DEBUG] first check book_idx={book_idx}")
|
||||
_seen_debug_keys.add(key)
|
||||
|
||||
# Log ACTIVE state
|
||||
if exists == 1:
|
||||
_debug(f"[ABORT] ACTIVE for {book_idx}")
|
||||
|
||||
return exists == 1
|
||||
|
||||
except Exception as e:
|
||||
if ABORT_DEBUG:
|
||||
_debug(f"[ABORT_DEBUG] ERROR checking {key}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
# =========================================================
|
||||
# PER-CHAPTER STATE — unified book_idx
|
||||
# =========================================================
|
||||
|
||||
|
||||
def mark_chapter_started(book_idx: str, chapter_num: int):
|
||||
key = f"started:{book_idx}:{chapter_num}"
|
||||
r.set(key, "1")
|
||||
|
||||
|
||||
def chapter_started(book_idx: str, chapter_num: int) -> bool:
|
||||
key = f"started:{book_idx}:{chapter_num}"
|
||||
return r.exists(key) == 1
|
||||
|
||||
|
||||
# =========================================================
|
||||
# RESET STATE FOR BOOK_IDX
|
||||
# =========================================================
|
||||
|
||||
|
||||
def reset_book_state(book_idx: str):
|
||||
"""
|
||||
Remove abort flag and all per-chapter started markers.
|
||||
"""
|
||||
# abort flag
|
||||
r.delete(f"abort:{book_idx}")
|
||||
|
||||
# chapter markers
|
||||
pattern = f"started:{book_idx}:*"
|
||||
for k in r.scan_iter(pattern):
|
||||
r.delete(k)
|
||||
@ -1,72 +1,176 @@
|
||||
# scraper/download_controller.py
|
||||
# =========================================================
|
||||
# File: scraper/download_controller.py
|
||||
# Purpose:
|
||||
# Build Celery pipelines for all chapters using book_idx
|
||||
# Handles:
|
||||
# • volume assignment
|
||||
# • cover download + replication
|
||||
# • script generation
|
||||
# • Redis Book State Model init
|
||||
# • abort tracking
|
||||
# =========================================================
|
||||
|
||||
from celery import group
|
||||
from scraper.tasks.pipeline import build_chapter_pipeline
|
||||
|
||||
# ❗ IMPORTANT:
|
||||
# generate_all_scripts MUST NOT import DownloadController, otherwise circular import.
|
||||
# We keep the import, but scriptgen must be clean.
|
||||
from scraper import scriptgen
|
||||
|
||||
from logbus.publisher import log
|
||||
import os
|
||||
import requests
|
||||
import shutil
|
||||
|
||||
from scraper.abort import abort_requested
|
||||
from db.state_redis import init_book_state
|
||||
from db.repository import set_status, set_chapters_total
|
||||
|
||||
|
||||
class DownloadController:
|
||||
"""Coordinates parallel chapter pipelines, with optional volume splitting."""
|
||||
"""
|
||||
Coordinates all chapter pipelines (download → parse → save).
|
||||
"""
|
||||
|
||||
def __init__(self, scrape_result: dict):
|
||||
def __init__(self, book_idx: str, scrape_result: dict):
|
||||
self.book_idx = str(book_idx)
|
||||
self.scrape_result = scrape_result
|
||||
|
||||
# Metadata
|
||||
self.title = scrape_result.get("title", "UnknownBook")
|
||||
self.chapters = scrape_result.get("chapters", [])
|
||||
self.chapters = scrape_result.get("chapters", []) or []
|
||||
self.cover_url = scrape_result.get("cover_url")
|
||||
|
||||
# Base output dir from .env
|
||||
# Output folder
|
||||
root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
|
||||
|
||||
# Volume size
|
||||
self.max_vol = int(os.getenv("MAX_VOL_SIZE", "200"))
|
||||
|
||||
# Base directory for the whole book
|
||||
self.book_base = os.path.join(root, self.title)
|
||||
os.makedirs(self.book_base, exist_ok=True)
|
||||
|
||||
# constant metadata for all chapters
|
||||
# Meta passed downstream
|
||||
self.meta = {
|
||||
"title": self.scrape_result.get("title"),
|
||||
"author": self.scrape_result.get("author"),
|
||||
"description": self.scrape_result.get("description"),
|
||||
"title": self.title,
|
||||
"author": scrape_result.get("author"),
|
||||
"description": scrape_result.get("description"),
|
||||
"book_url": scrape_result.get("book_url"),
|
||||
}
|
||||
|
||||
log(f"[CTRL_DEBUG] Controller init book_idx={self.book_idx}")
|
||||
|
||||
# Init Redis Book State Model
|
||||
try:
|
||||
init_book_state(
|
||||
book_id=self.book_idx,
|
||||
title=self.title,
|
||||
url=self.meta["book_url"],
|
||||
chapters_total=len(self.chapters),
|
||||
)
|
||||
except Exception as e:
|
||||
log(f"[CTRL_STATE] init_book_state FAILED: {e}")
|
||||
|
||||
# ---------------------------------------------------------
|
||||
def download_cover(self):
|
||||
if not self.cover_url:
|
||||
return log(f"[CTRL] No cover URL for '{self.title}'")
|
||||
|
||||
cover_path = os.path.join(self.book_base, "cover.jpg")
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0",
|
||||
"Referer": self.scrape_result.get("book_url") or "",
|
||||
}
|
||||
|
||||
try:
|
||||
log(f"[CTRL] Downloading cover: {self.cover_url}")
|
||||
resp = requests.get(self.cover_url, timeout=10, headers=headers)
|
||||
resp.raise_for_status()
|
||||
|
||||
with open(cover_path, "wb") as f:
|
||||
f.write(resp.content)
|
||||
|
||||
log(f"[CTRL] Cover saved: {cover_path}")
|
||||
except Exception as e:
|
||||
log(f"[CTRL] Cover download failed: {e}")
|
||||
|
||||
# ---------------------------------------------------------
|
||||
def replicate_cover_to_volumes(self):
|
||||
src = os.path.join(self.book_base, "cover.jpg")
|
||||
if not os.path.exists(src):
|
||||
return
|
||||
|
||||
for entry in os.listdir(self.book_base):
|
||||
if entry.lower().startswith("volume_"):
|
||||
dst = os.path.join(self.book_base, entry, "cover.jpg")
|
||||
try:
|
||||
shutil.copyfile(src, dst)
|
||||
log(f"[CTRL] Cover replicated → {dst}")
|
||||
except Exception as e:
|
||||
log(f"[CTRL] Cover replication failed: {e}")
|
||||
|
||||
# ---------------------------------------------------------
|
||||
def store_cover_in_static(self):
|
||||
src = os.path.join(self.book_base, "cover.jpg")
|
||||
if not os.path.exists(src):
|
||||
return
|
||||
|
||||
os.makedirs("static/covers", exist_ok=True)
|
||||
dst = os.path.join("static/covers", f"{self.book_idx}.jpg")
|
||||
|
||||
try:
|
||||
shutil.copyfile(src, dst)
|
||||
log(f"[CTRL] Cover stored for UI: {dst}")
|
||||
except Exception as e:
|
||||
log(f"[CTRL] Failed storing cover: {e}")
|
||||
|
||||
# ---------------------------------------------------------
|
||||
def get_volume_path(self, chapter_num: int) -> str:
|
||||
"""Returns the correct volume directory based on chapter number."""
|
||||
vol_index = (chapter_num - 1) // self.max_vol + 1
|
||||
vol_name = f"Volume_{vol_index:03d}"
|
||||
vol_path = os.path.join(self.book_base, vol_name)
|
||||
os.makedirs(vol_path, exist_ok=True)
|
||||
return vol_path
|
||||
|
||||
# ---------------------------------------------------------
|
||||
def start(self):
|
||||
log(f"[CTRL] Starting download pipeline for {self.title}")
|
||||
log(f"[CTRL] Chapters: {len(self.chapters)}")
|
||||
log(f"[CTRL] Output root: {self.book_base}")
|
||||
log(f"[CTRL] MAX_VOL_SIZE = {self.max_vol}")
|
||||
total = len(self.chapters)
|
||||
log(f"[CTRL] Starting pipeline for '{self.title}' ({total} chapters)")
|
||||
|
||||
tasks = []
|
||||
# Update Redis/SQLite state
|
||||
try:
|
||||
set_status(self.book_idx, "downloading")
|
||||
set_chapters_total(self.book_idx, total)
|
||||
except Exception as e:
|
||||
log(f"[CTRL_STATE] Unable to set state: {e}")
|
||||
|
||||
# Download cover
|
||||
self.download_cover()
|
||||
|
||||
# Build pipeline tasks
|
||||
tasks = []
|
||||
for ch in self.chapters:
|
||||
chapter_num = ch["num"]
|
||||
chapter_url = ch["url"]
|
||||
|
||||
# compute volume directory
|
||||
vol_path = self.get_volume_path(chapter_num)
|
||||
|
||||
# build the pipeline for this chapter
|
||||
tasks.append(
|
||||
build_chapter_pipeline(
|
||||
chapter_num,
|
||||
chapter_url,
|
||||
vol_path, # ✔ correct volume path!!
|
||||
self.meta, # ✔ pass metadata once
|
||||
)
|
||||
num = ch["num"]
|
||||
chapter_info = {
|
||||
"num": num,
|
||||
"url": ch["url"],
|
||||
"title": ch.get("title"),
|
||||
"volume_path": self.get_volume_path(num),
|
||||
}
|
||||
tasks.append(build_chapter_pipeline(self.book_idx, chapter_info, self.meta))
|
||||
|
||||
async_result = group(tasks).apply_async()
|
||||
|
||||
# Replicate cover + place in static
|
||||
self.replicate_cover_to_volumes()
|
||||
self.store_cover_in_static()
|
||||
|
||||
# Generate scripts (LATE IMPORT to avoid circular)
|
||||
try:
|
||||
scriptgen.generate_all_scripts(
|
||||
self.book_base, self.title, self.meta["author"]
|
||||
)
|
||||
log("[CTRL] Scripts generated")
|
||||
except Exception as e:
|
||||
log(f"[CTRL] Script generation failed: {e}")
|
||||
|
||||
# parallel processing
|
||||
job_group = group(tasks)
|
||||
async_result = job_group.apply_async()
|
||||
|
||||
log("[CTRL] Pipelines launched.")
|
||||
return async_result
|
||||
|
||||
@ -0,0 +1,27 @@
|
||||
# ============================================================
|
||||
# File: scraper/engine/fetcher.py
|
||||
# Purpose:
|
||||
# Low-level HTML fetch utility shared by all site scrapers.
|
||||
# Replaces scattered _fetch() logic inside BookScraper.
|
||||
# ============================================================
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:118.0) "
|
||||
"Gecko/20100101 Firefox/118.0"
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
def fetch_html(url: str, encoding: str = "utf-8", timeout: int = 10) -> BeautifulSoup:
|
||||
"""
|
||||
Fetch HTML with a consistent user-agent and encoding.
|
||||
Returns BeautifulSoup(lxml).
|
||||
"""
|
||||
resp = requests.get(url, headers=HEADERS, timeout=timeout)
|
||||
resp.encoding = encoding
|
||||
return BeautifulSoup(resp.text, "lxml")
|
||||
@ -0,0 +1,65 @@
|
||||
# ============================================================
|
||||
# File: scraper/engine/parser.py
|
||||
# Purpose:
|
||||
# High-level scraping API coordinating metadata extraction
|
||||
# and chapter extraction using pluggable SiteScraper classes.
|
||||
#
|
||||
# This is the new central engine:
|
||||
# - extract_metadata_only() used by INIT flow
|
||||
# - extract_metadata_full() used by full scraping pipeline
|
||||
# ============================================================
|
||||
|
||||
from scraper.engine.fetcher import fetch_html
|
||||
|
||||
|
||||
def extract_metadata_only(url: str, site_scraper):
|
||||
"""
|
||||
Extract ONLY lightweight metadata:
|
||||
- title
|
||||
- author
|
||||
- description
|
||||
- cover_url
|
||||
- chapters_total = 0
|
||||
"""
|
||||
soup = fetch_html(url, site_scraper.encoding)
|
||||
|
||||
title = site_scraper.parse_title(soup)
|
||||
author = site_scraper.parse_author(soup)
|
||||
description = site_scraper.parse_description(soup)
|
||||
cover_url = site_scraper.parse_cover(soup, url)
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"author": author,
|
||||
"description": description,
|
||||
"cover_url": cover_url,
|
||||
"chapters_total": 0,
|
||||
"book_url": url,
|
||||
}
|
||||
|
||||
|
||||
def extract_metadata_full(url: str, site_scraper):
|
||||
"""
|
||||
Full scraping (metadata + chapterlist).
|
||||
Used by the scraping Celery pipeline.
|
||||
"""
|
||||
soup = fetch_html(url, site_scraper.encoding)
|
||||
|
||||
# metadata
|
||||
meta = extract_metadata_only(url, site_scraper)
|
||||
|
||||
# chapter list
|
||||
chapter_page_url = site_scraper.extract_chapter_page_url(soup)
|
||||
chapter_page_soup = fetch_html(chapter_page_url, site_scraper.encoding)
|
||||
chapters = site_scraper.parse_chapter_list(chapter_page_soup)
|
||||
|
||||
meta["chapters"] = chapters
|
||||
return meta
|
||||
|
||||
|
||||
def build_book_id(title: str) -> str:
|
||||
"""
|
||||
Canonical book_id generator.
|
||||
SCRAPE currently uses title as ID → preserve that behavior.
|
||||
"""
|
||||
return title
|
||||
@ -0,0 +1,33 @@
|
||||
# ============================================================
|
||||
# File: scraper/logger_decorators.py
|
||||
# Purpose: Function-call logging decorator
|
||||
# ============================================================
|
||||
|
||||
from functools import wraps
|
||||
from scraper.logger import log_debug
|
||||
|
||||
|
||||
def logcall(func):
|
||||
"""
|
||||
Decorator: log function name + arguments every time it's called.
|
||||
Usage: @logcall above any function.
|
||||
"""
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
# Naam van de functie
|
||||
name = func.__qualname__
|
||||
|
||||
# Eerste logregel vóór uitvoering
|
||||
# log_debug(f"[CALL] {name} args={args} kwargs={kwargs}")
|
||||
log_debug(f"[CALL] {name} args={args}")
|
||||
# log_debug(f"[CALL] {name}")
|
||||
|
||||
result = func(*args, **kwargs)
|
||||
|
||||
# Log ná uitvoering
|
||||
# log_debug(f"[RETURN] {name} → {result}")
|
||||
|
||||
return result
|
||||
|
||||
return wrapper
|
||||
@ -0,0 +1,27 @@
|
||||
#scraper/replacements/html.txt
|
||||
<br>=\n
|
||||
<br/>=\n
|
||||
<br />=\n
|
||||
=
|
||||
  =
|
||||
   =
|
||||
 =
|
||||
 =
|
||||
 =
|
||||
“="
|
||||
”="
|
||||
‘='
|
||||
’='
|
||||
<=<
|
||||
>=>
|
||||
©=
|
||||
®=
|
||||
™=
|
||||
fontbigbigbig=
|
||||
fontbigbig=
|
||||
font1=
|
||||
font2=
|
||||
font3=
|
||||
strongstrong=
|
||||
divdiv=
|
||||
spanspan=
|
||||
@ -0,0 +1,147 @@
|
||||
# scraper/scriptgen.py
|
||||
# Generates scripts (allinone.txt, makebook.txt, say.txt)
|
||||
# using external templates + dynamic merge generation.
|
||||
|
||||
import os
|
||||
import stat
|
||||
from logbus.publisher import log
|
||||
from scraper.logger_decorators import logcall
|
||||
|
||||
TEMPLATE_DIR = os.path.join(os.path.dirname(__file__), "templates")
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Load a template file from scraper/templates/
|
||||
# ------------------------------------------------------------
|
||||
def load_template(name: str) -> str:
|
||||
path = os.path.join(TEMPLATE_DIR, name)
|
||||
if not os.path.exists(path):
|
||||
log(f"[SCRIPTGEN] Template missing: {path}")
|
||||
return ""
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Detect volumes (Volume_001, Volume_002, ...)
|
||||
# ------------------------------------------------------------
|
||||
def detect_volumes(book_base: str):
|
||||
vols = []
|
||||
for name in os.listdir(book_base):
|
||||
p = os.path.join(book_base, name)
|
||||
if os.path.isdir(p) and name.lower().startswith("volume_"):
|
||||
try:
|
||||
num = int(name.split("_")[1])
|
||||
vols.append((num, name))
|
||||
except Exception:
|
||||
continue
|
||||
vols.sort()
|
||||
return vols
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Build the dynamic merge block
|
||||
# ------------------------------------------------------------
|
||||
def build_merge_block(title: str, author: str, volumes):
|
||||
lines = []
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Normalize input (defensive)
|
||||
# --------------------------------------------------------
|
||||
title = (title or "").strip()
|
||||
author = (author or "").strip()
|
||||
|
||||
total_vols = len(volumes)
|
||||
|
||||
# Padding-regel:
|
||||
# - altijd minimaal 2 (01, 02)
|
||||
# - 3 bij >=100
|
||||
if total_vols >= 100:
|
||||
pad = 3
|
||||
else:
|
||||
pad = 2
|
||||
|
||||
for num, dirname in volumes:
|
||||
vol_num = f"{num:0{pad}d}" # voor filename
|
||||
series_part = f"{num:0{pad}d}" # voor series-part (string!)
|
||||
|
||||
line = (
|
||||
f"m4b-tool merge --jobs=4 "
|
||||
f'--writer="{author}" '
|
||||
f'--sortalbum="{title}" '
|
||||
f'--albumartist="{author}" '
|
||||
f'--album="{title}" '
|
||||
f'--name="{title}" '
|
||||
f'--series="{title}" '
|
||||
f'--series-part="{series_part}" '
|
||||
f'--output-file="{title}-{vol_num}.m4b" '
|
||||
f'"{dirname}" -vvv'
|
||||
)
|
||||
|
||||
lines.append(line)
|
||||
|
||||
if not lines:
|
||||
return ""
|
||||
|
||||
return " \\\n&& ".join(lines) + "\n"
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Main generator
|
||||
# ------------------------------------------------------------
|
||||
@logcall
|
||||
def generate_all_scripts(book_base: str, title: str, author: str):
|
||||
# --------------------------------------------------------
|
||||
# Defensive normalize
|
||||
# --------------------------------------------------------
|
||||
title = (title or "").strip()
|
||||
author = (author or "").strip()
|
||||
|
||||
log(f"[SCRIPTGEN] Generating scripts in {book_base}")
|
||||
|
||||
# Load templates
|
||||
say_template = load_template("say.template")
|
||||
cleanup_template = load_template("cleanup.template")
|
||||
|
||||
volumes = detect_volumes(book_base)
|
||||
log(f"[SCRIPTGEN] Volumes detected: {volumes}")
|
||||
|
||||
merge_block = build_merge_block(title, author, volumes)
|
||||
|
||||
# --------------------------------------------------------
|
||||
# allinone.txt = say + cleanup + merge
|
||||
# --------------------------------------------------------
|
||||
outfile = os.path.join(book_base, "allinone.txt")
|
||||
with open(outfile, "w", encoding="utf-8") as f:
|
||||
f.write(say_template)
|
||||
f.write("\n")
|
||||
f.write(cleanup_template)
|
||||
f.write("\n")
|
||||
f.write(merge_block)
|
||||
os.chmod(outfile, os.stat(outfile).st_mode | stat.S_IEXEC)
|
||||
log(f"[SCRIPTGEN] Created {outfile}")
|
||||
|
||||
# --------------------------------------------------------
|
||||
# makebook.txt = merge only
|
||||
# --------------------------------------------------------
|
||||
outfile2 = os.path.join(book_base, "makebook.txt")
|
||||
with open(outfile2, "w", encoding="utf-8") as f:
|
||||
f.write(merge_block)
|
||||
os.chmod(outfile2, os.stat(outfile2).st_mode | stat.S_IEXEC)
|
||||
log(f"[SCRIPTGEN] Created {outfile2}")
|
||||
|
||||
# --------------------------------------------------------
|
||||
# say.txt = say + cleanup
|
||||
# --------------------------------------------------------
|
||||
outfile3 = os.path.join(book_base, "say.txt")
|
||||
with open(outfile3, "w", encoding="utf-8") as f:
|
||||
f.write(say_template)
|
||||
f.write("\n")
|
||||
f.write(cleanup_template)
|
||||
os.chmod(outfile3, os.stat(outfile3).st_mode | stat.S_IEXEC)
|
||||
log(f"[SCRIPTGEN] Created {outfile3}")
|
||||
|
||||
log(f"[SCRIPTGEN] All scripts generated successfully for '{title}'")
|
||||
|
||||
|
||||
__all__ = ["generate_all_scripts"]
|
||||
@ -0,0 +1,94 @@
|
||||
# ============================================================
|
||||
# File: scraper/services/audio_completion.py
|
||||
# Purpose:
|
||||
# Orchestration hook after audio completion.
|
||||
#
|
||||
# Rules (STRICT):
|
||||
# - ALWAYS read via get_book_state()
|
||||
# - Use ONLY merged counters from repository
|
||||
# - NO usage of derived status field
|
||||
# - Completion rule:
|
||||
# audio_completed < chapters_total → NOT DONE
|
||||
# ============================================================
|
||||
|
||||
from logbus.publisher import log
|
||||
from scraper.logger_decorators import logcall
|
||||
|
||||
from db.repository import (
|
||||
get_book_state,
|
||||
try_trigger_statuscheck,
|
||||
)
|
||||
|
||||
from scraper.services.status_check_service import StatusCheckService
|
||||
from scraper.tasks.m4b_tasks import queue_m4b_for_book
|
||||
|
||||
|
||||
@logcall
|
||||
def trigger_audio_completion_check(book_idx: str):
|
||||
"""
|
||||
Called after inc_audio_done() OR inc_audio_skipped().
|
||||
|
||||
Flow:
|
||||
1. Fetch canonical merged state from repository
|
||||
2. Evaluate completion via merged counters ONLY
|
||||
3. Run filesystem validation (authoritative)
|
||||
4. Apply idempotency guard
|
||||
5. Queue m4b exactly once
|
||||
"""
|
||||
|
||||
try:
|
||||
# ----------------------------------------------------
|
||||
# STEP 1 — CANONICAL MERGED STATE
|
||||
# ----------------------------------------------------
|
||||
state = get_book_state(book_idx)
|
||||
|
||||
chapters_total = int(state.get("chapters_total", 0))
|
||||
audio_done = int(state.get("audio_done", 0))
|
||||
audio_skipped = int(state.get("audio_skipped", 0))
|
||||
audio_completed = audio_done + audio_skipped
|
||||
|
||||
log(
|
||||
f"[AUDIO-COMPLETION] book={book_idx} "
|
||||
f"audio_completed={audio_completed} chapters_total={chapters_total}"
|
||||
)
|
||||
|
||||
# ----------------------------------------------------
|
||||
# STEP 2 — FAST REJECT (MERGED COUNTERS ONLY)
|
||||
# ----------------------------------------------------
|
||||
if chapters_total <= 0 or audio_completed < chapters_total:
|
||||
log(f"[AUDIO-COMPLETION] not yet complete for book={book_idx}")
|
||||
return
|
||||
|
||||
# ----------------------------------------------------
|
||||
# STEP 3 — FILESYSTEM VALIDATION (AUTHORITATIVE)
|
||||
# ----------------------------------------------------
|
||||
result = StatusCheckService.run(book_idx)
|
||||
fs = result.get("filesystem", {})
|
||||
|
||||
audio_files = fs.get("audio_files", 0)
|
||||
chapters_txt = fs.get("chapters_txt", 0)
|
||||
effective_audio = audio_files + audio_skipped
|
||||
|
||||
if effective_audio < chapters_txt:
|
||||
log(
|
||||
f"[AUDIO-COMPLETION] FS validation failed "
|
||||
f"(audio_files={audio_files}, skipped={audio_skipped}, txt={chapters_txt})"
|
||||
)
|
||||
return
|
||||
|
||||
# ----------------------------------------------------
|
||||
# STEP 4 — IDEMPOTENCY GUARD (AFTER FS CONFIRMATION)
|
||||
# ----------------------------------------------------
|
||||
if not try_trigger_statuscheck(book_idx):
|
||||
log(f"[AUDIO-COMPLETION] statuscheck already triggered for {book_idx}")
|
||||
return
|
||||
|
||||
# ----------------------------------------------------
|
||||
# STEP 5 — FINAL ACTION
|
||||
# ----------------------------------------------------
|
||||
log(f"[AUDIO-COMPLETION] DONE → queue m4b for book={book_idx}")
|
||||
queue_m4b_for_book(book_idx)
|
||||
|
||||
except Exception as exc:
|
||||
# MUST NEVER break audio workers
|
||||
log(f"[AUDIO-COMPLETION][ERROR] book={book_idx} error={exc}")
|
||||
@ -0,0 +1,45 @@
|
||||
# ============================================================
|
||||
# File: scraper/services/cover_service.py
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import requests
|
||||
from logbus.publisher import log
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class CoverService:
|
||||
|
||||
@staticmethod
|
||||
def download_main_cover(cover_url: str, book_id: str) -> Optional[str]:
|
||||
"""
|
||||
Downloads cover image into: static/covers/<book_id>.jpg.
|
||||
Returns local path or None.
|
||||
"""
|
||||
|
||||
if not cover_url:
|
||||
log(f"[COVER] No cover URL for book={book_id}")
|
||||
return None
|
||||
|
||||
static_dir = os.path.join("static", "covers")
|
||||
os.makedirs(static_dir, exist_ok=True)
|
||||
|
||||
dst_path = os.path.join(static_dir, f"{book_id}.jpg")
|
||||
|
||||
try:
|
||||
log(f"[COVER] Downloading: {cover_url}")
|
||||
|
||||
resp = requests.get(
|
||||
cover_url, timeout=10, headers={"User-Agent": "Mozilla/5.0"}
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
with open(dst_path, "wb") as f:
|
||||
f.write(resp.content)
|
||||
|
||||
log(f"[COVER] Stored: {dst_path}")
|
||||
return dst_path
|
||||
|
||||
except Exception as e:
|
||||
log(f"[COVER] FAILED ({cover_url}) → {e}")
|
||||
return None
|
||||
@ -0,0 +1,95 @@
|
||||
# ============================================================
|
||||
# File: scraper/services/init_service.py
|
||||
# Purpose:
|
||||
# Orchestrate INIT-flow:
|
||||
# - resolve site
|
||||
# - fetch minimal metadata
|
||||
# - derive book_idx
|
||||
# - register in SQLite
|
||||
# - store main cover
|
||||
# ============================================================
|
||||
|
||||
import re
|
||||
from scraper.services.site_resolver import SiteResolver
|
||||
from scraper.services.scrape_engine import ScrapeEngine
|
||||
from scraper.services.cover_service import CoverService
|
||||
|
||||
from db.repository import register_book
|
||||
|
||||
from scraper.logger_decorators import logcall
|
||||
|
||||
|
||||
class InitService:
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# BOOK IDX DERIVATION
|
||||
# ------------------------------------------------------------
|
||||
@staticmethod
|
||||
@logcall
|
||||
def derive_book_id(url: str) -> str:
|
||||
"""
|
||||
PTWXZ URL format ends with /{id}.html.
|
||||
If no match → fallback to sanitized URL.
|
||||
|
||||
Returns:
|
||||
book_idx (string)
|
||||
"""
|
||||
m = re.search(r"/(\d+)\.html$", url)
|
||||
if m:
|
||||
return m.group(1)
|
||||
|
||||
# Fallback — ensures deterministic ID for unknown formats
|
||||
return url.replace("/", "_").replace(":", "_")
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# MAIN INIT FLOW
|
||||
# ------------------------------------------------------------
|
||||
@staticmethod
|
||||
@logcall
|
||||
def execute(url: str) -> dict:
|
||||
"""
|
||||
INIT entry point.
|
||||
Returns complete metadata + registration result.
|
||||
"""
|
||||
|
||||
# 1) Resolve site handler
|
||||
site = SiteResolver.resolve(url)
|
||||
|
||||
# 2) Create unified book_idx
|
||||
book_idx = InitService.derive_book_id(url)
|
||||
|
||||
# Some site objects historically expect .book_id — we support it but DO NOT rely on it.
|
||||
site.book_id = book_idx
|
||||
|
||||
# 3) Fetch initial metadata (title/author/description/cover)
|
||||
meta = ScrapeEngine.fetch_metadata_only(site, url)
|
||||
|
||||
title = meta.get("title") or "Unknown"
|
||||
author = meta.get("author")
|
||||
description = meta.get("description")
|
||||
cover_url = meta.get("cover_url")
|
||||
|
||||
# 4) Download & store main cover for UI
|
||||
cover_path = CoverService.download_main_cover(cover_url, book_idx)
|
||||
|
||||
# 5) Register in SQLite (book_idx is the SOLE primary ID)
|
||||
register_book(
|
||||
book_idx=book_idx,
|
||||
title=title,
|
||||
author=author,
|
||||
description=description,
|
||||
cover_url=cover_url,
|
||||
cover_path=cover_path,
|
||||
book_url=url,
|
||||
)
|
||||
|
||||
# 6) Return metadata for UI / API
|
||||
return {
|
||||
"book_idx": book_idx,
|
||||
"title": title,
|
||||
"author": author,
|
||||
"description": description,
|
||||
"cover_url": cover_url,
|
||||
"cover_path": cover_path,
|
||||
"status": "registered",
|
||||
}
|
||||
@ -0,0 +1,20 @@
|
||||
# ============================================================
|
||||
# File: scraper/services/site_resolver.py
|
||||
# Purpose:
|
||||
# Determine which BookSite implementation applies for a given URL.
|
||||
# This keeps INIT-flow and SCRAPE-flow site-agnostic.
|
||||
# ============================================================
|
||||
|
||||
from scraper.sites import BookSite # current PTWXZ implementation
|
||||
|
||||
|
||||
class SiteResolver:
|
||||
"""
|
||||
Resolves the correct BookSite class based on URL.
|
||||
Currently only PTWXZ/Piaotian is supported.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def resolve(url: str):
|
||||
# Later: add more domain rules for other sources
|
||||
return BookSite()
|
||||
@ -0,0 +1,135 @@
|
||||
# ============================================================
|
||||
# File: scraper/services/status_check_service.py
|
||||
# Purpose:
|
||||
# Handmatige, idempotente statuscheck per boek.
|
||||
#
|
||||
# Bepaalt op basis van het filesystem:
|
||||
# - aantal gedownloade chapters (.txt)
|
||||
# - aantal gegenereerde audiofiles (.m4b)
|
||||
#
|
||||
# En schrijft deze gevalideerde werkelijkheid naar SQL.
|
||||
#
|
||||
# LET OP:
|
||||
# - Geen Redis
|
||||
# - Geen Celery
|
||||
# - Geen status-transities
|
||||
# - Geen pipeline-logica
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any
|
||||
|
||||
from logbus.publisher import log
|
||||
from scraper.logger_decorators import logcall
|
||||
|
||||
from db.state_sql import sql_fetch_book, sql_update_book
|
||||
|
||||
|
||||
class StatusCheckService:
|
||||
"""
|
||||
Statuscheck op basis van filesystem.
|
||||
Single source of truth = disk.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
@logcall
|
||||
def run(book_idx: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Voer statuscheck uit voor één boek.
|
||||
|
||||
Returns een inspecteerbaar dict met:
|
||||
- filesystem tellingen
|
||||
- SQL before / after snapshot
|
||||
"""
|
||||
|
||||
# ----------------------------------------------------
|
||||
# 1. SQL fetch (bestaat het boek?)
|
||||
# ----------------------------------------------------
|
||||
sql_before = sql_fetch_book(book_idx)
|
||||
|
||||
if not sql_before:
|
||||
raise ValueError(f"[STATUSCHECK] Book not found in SQL: {book_idx}")
|
||||
|
||||
# ----------------------------------------------------
|
||||
# 2. Bepaal filesystem root
|
||||
# ----------------------------------------------------
|
||||
output_root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
|
||||
title = sql_before.get("title")
|
||||
book_dir = os.path.join(output_root, title)
|
||||
|
||||
if not os.path.isdir(book_dir):
|
||||
log(
|
||||
f"[STATUSCHECK] No output directory for book_idx={book_idx} : title='{title}')"
|
||||
)
|
||||
chapters_txt = 0
|
||||
audio_files = 0
|
||||
volumes = 0
|
||||
else:
|
||||
chapters_txt = 0
|
||||
audio_files = 0
|
||||
volumes = 0
|
||||
|
||||
# ------------------------------------------------
|
||||
# 3. Scan volumes
|
||||
# ------------------------------------------------
|
||||
for entry in os.listdir(book_dir):
|
||||
if not entry.lower().startswith("volume_"):
|
||||
continue
|
||||
|
||||
volumes += 1
|
||||
volume_path = os.path.join(book_dir, entry)
|
||||
|
||||
if not os.path.isdir(volume_path):
|
||||
continue
|
||||
|
||||
# ---- TXT chapters ----
|
||||
for fname in os.listdir(volume_path):
|
||||
if fname.lower().endswith(".txt"):
|
||||
chapters_txt += 1
|
||||
|
||||
# ---- Audio ----
|
||||
audio_dir = os.path.join(volume_path, "Audio")
|
||||
if os.path.isdir(audio_dir):
|
||||
for fname in os.listdir(audio_dir):
|
||||
if fname.lower().endswith(".m4b"):
|
||||
audio_files += 1
|
||||
|
||||
# ----------------------------------------------------
|
||||
# 4. SQL update (snapshot)
|
||||
# ----------------------------------------------------
|
||||
now = datetime.utcnow().isoformat(timespec="seconds")
|
||||
|
||||
update_fields = {
|
||||
"downloaded": chapters_txt,
|
||||
"audio_done": audio_files,
|
||||
"last_update": now,
|
||||
}
|
||||
|
||||
sql_update_book(book_idx, update_fields)
|
||||
|
||||
sql_after = sql_fetch_book(book_idx)
|
||||
|
||||
# ----------------------------------------------------
|
||||
# 5. Resultaat voor inspect/debug
|
||||
# ----------------------------------------------------
|
||||
result = {
|
||||
"book_idx": book_idx,
|
||||
"filesystem": {
|
||||
"book_dir": book_dir,
|
||||
"exists": os.path.isdir(book_dir),
|
||||
"volumes": volumes,
|
||||
"chapters_txt": chapters_txt,
|
||||
"audio_files": audio_files,
|
||||
},
|
||||
"sql_before": sql_before,
|
||||
"sql_after": sql_after,
|
||||
"notes": [],
|
||||
}
|
||||
|
||||
log(
|
||||
f"[STATUSCHECK] book_idx={book_idx} "
|
||||
f"chapters={chapters_txt} audio={audio_files}"
|
||||
)
|
||||
|
||||
return result
|
||||
@ -0,0 +1,28 @@
|
||||
# ============================================================
|
||||
# File: scraper/sites/__init__.py
|
||||
# Purpose:
|
||||
# Site autodetection based on URL.
|
||||
# ============================================================
|
||||
|
||||
from scraper.sites.piaotian import PiaotianScraper
|
||||
|
||||
|
||||
def get_scraper_for_url(url: str):
|
||||
"""
|
||||
Return the correct scraper instance for a given URL.
|
||||
Later: add more site implementations.
|
||||
"""
|
||||
if "ptwxz" in url or "piaotian" in url:
|
||||
return PiaotianScraper()
|
||||
|
||||
raise ValueError(f"No scraper available for URL: {url}")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Backwards-compatibility export for legacy BookScraper
|
||||
# ============================================================
|
||||
# Old code expects:
|
||||
# from scraper.sites import BookSite
|
||||
# We map that to our new PiaotianScraper implementation.
|
||||
|
||||
BookSite = PiaotianScraper
|
||||
@ -0,0 +1,52 @@
|
||||
# ============================================================
|
||||
# File: scraper/sites/base.py
|
||||
# Purpose:
|
||||
# Abstract interface that every site-specific scraper must implement.
|
||||
# ============================================================
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class SiteScraper(ABC):
|
||||
"""
|
||||
Defines the interface for site-specific scrapers.
|
||||
Each concrete scraper (Piaotian, Biquge, etc.) must implement these.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def root(self) -> str: ...
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def encoding(self) -> str: ...
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def chapter_list_selector(self) -> str: ...
|
||||
|
||||
# --------------------------
|
||||
# Metadata extraction
|
||||
# --------------------------
|
||||
@abstractmethod
|
||||
def parse_title(self, soup: BeautifulSoup) -> str: ...
|
||||
|
||||
@abstractmethod
|
||||
def parse_author(self, soup: BeautifulSoup) -> str: ...
|
||||
|
||||
@abstractmethod
|
||||
def parse_description(self, soup: BeautifulSoup) -> str: ...
|
||||
|
||||
@abstractmethod
|
||||
def parse_cover(self, soup: BeautifulSoup, url: str) -> Optional[str]: ...
|
||||
|
||||
# --------------------------
|
||||
# Chapter extraction
|
||||
# --------------------------
|
||||
@abstractmethod
|
||||
def extract_chapter_page_url(self, soup: BeautifulSoup) -> str: ...
|
||||
|
||||
@abstractmethod
|
||||
def parse_chapter_list(self, soup: BeautifulSoup) -> list: ...
|
||||
@ -0,0 +1,7 @@
|
||||
# scraper/state.py
|
||||
import os
|
||||
import redis
|
||||
|
||||
REDIS_STATE_URL = os.getenv("REDIS_STATE", "redis://redis:6379/2")
|
||||
|
||||
state = redis.Redis.from_url(REDIS_STATE_URL, decode_responses=True)
|
||||
@ -1,10 +0,0 @@
|
||||
# tasks/audio.py
|
||||
from celery import shared_task
|
||||
from logbus.publisher import log
|
||||
|
||||
|
||||
@shared_task(bind=True, queue="audio")
|
||||
def text_to_audio(self, text_file):
|
||||
log(f"[AUDIO] converting: {text_file}")
|
||||
# placeholder for macOS "say"
|
||||
return True
|
||||
@ -0,0 +1,220 @@
|
||||
# ============================================================
|
||||
# File: scraper/tasks/audio_tasks.py
|
||||
# Purpose: Convert chapter text files into audio using macOS
|
||||
# “say”, with Redis-based slot control.
|
||||
# Updated: now uses db.repository for audio counters.
|
||||
# ============================================================
|
||||
|
||||
from celery_app import celery_app
|
||||
from logbus.publisher import log
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
import socket
|
||||
import os
|
||||
|
||||
from scraper.abort import abort_requested
|
||||
from scraper.logger_decorators import logcall
|
||||
from redis import Redis
|
||||
from urllib.parse import urlparse
|
||||
from scraper.services.audio_completion import trigger_audio_completion_check
|
||||
|
||||
# NEW — unified repository façade
|
||||
from db.repository import (
|
||||
inc_audio_done,
|
||||
inc_audio_skipped,
|
||||
)
|
||||
|
||||
HOST = socket.gethostname()
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# REDIS CLIENT SETUP
|
||||
# ------------------------------------------------------------
|
||||
redis_url = os.getenv("REDIS_BACKEND_LOCAL") or os.getenv("REDIS_BACKEND")
|
||||
parsed = urlparse(redis_url)
|
||||
|
||||
# Slot locking Redis client
|
||||
redis_client = Redis(
|
||||
host=parsed.hostname,
|
||||
port=parsed.port,
|
||||
db=parsed.path.strip("/"),
|
||||
)
|
||||
|
||||
# Abort + global progress flags always live in DB 0
|
||||
backend_client = Redis(
|
||||
host=parsed.hostname,
|
||||
port=parsed.port,
|
||||
db=0,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# CONFIG
|
||||
# ------------------------------------------------------------
|
||||
AUDIO_TIMEOUT = int(os.getenv("AUDIO_TIMEOUT_SECONDS", "300"))
|
||||
AUDIO_VOICE = os.getenv("AUDIO_VOICE", "SinJi")
|
||||
AUDIO_RATE = int(os.getenv("AUDIO_RATE", "200"))
|
||||
|
||||
HOST_PATH = os.getenv("HOST_PATH", "/app/output")
|
||||
CONTAINER_PREFIX = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "/app/output")
|
||||
|
||||
AUDIO_SLOTS = int(os.getenv("AUDIO_SLOTS", "1"))
|
||||
|
||||
|
||||
# ============================================================
|
||||
# CELERY TASK
|
||||
# ============================================================
|
||||
@celery_app.task(bind=True, queue="audio", ignore_result=True)
|
||||
@logcall
|
||||
def generate_audio(
|
||||
self, book_id, volume_name, chapter_number, chapter_title, chapter_path
|
||||
):
|
||||
"""
|
||||
chapter_path: absolute container path to chapter text file.
|
||||
"""
|
||||
|
||||
log(f"[AUDIO]({HOST}) CH{chapter_number}: START → {chapter_title}")
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# ABORT CHECK
|
||||
# ------------------------------------------------------------
|
||||
if abort_requested(book_id, backend_client):
|
||||
inc_audio_skipped(book_id)
|
||||
log(f"[AUDIO]({HOST}) ABORT detected → skip CH{chapter_number}")
|
||||
return
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# ACQUIRE SLOT
|
||||
# ------------------------------------------------------------
|
||||
slot_key = None
|
||||
ttl = AUDIO_TIMEOUT + 15
|
||||
|
||||
for i in range(1, AUDIO_SLOTS + 1):
|
||||
key = f"audio_slot:{i}"
|
||||
if redis_client.set(key, "1", nx=True, ex=ttl):
|
||||
slot_key = key
|
||||
log(f"[AUDIO] CH{chapter_number}: Acquired slot {i}/{AUDIO_SLOTS}")
|
||||
break
|
||||
|
||||
# Need to wait
|
||||
if slot_key is None:
|
||||
log(f"[AUDIO] CH{chapter_number}: All slots busy → waiting…")
|
||||
start_wait = time.time()
|
||||
|
||||
while slot_key is None:
|
||||
# Try all slots again
|
||||
for i in range(1, AUDIO_SLOTS + 1):
|
||||
key = f"audio_slot:{i}"
|
||||
if redis_client.set(key, "1", nx=True, ex=ttl):
|
||||
slot_key = key
|
||||
log(f"[AUDIO] CH{chapter_number}: Slot acquired after wait")
|
||||
break
|
||||
|
||||
# If still no slot
|
||||
if not slot_key:
|
||||
if abort_requested(book_id, backend_client):
|
||||
log(f"[AUDIO] ABORT while waiting → skip CH{chapter_number}")
|
||||
inc_audio_skipped(book_id)
|
||||
return
|
||||
|
||||
if time.time() - start_wait > ttl:
|
||||
log(f"[AUDIO] CH{chapter_number}: Wait timeout → abort audio")
|
||||
inc_audio_skipped(book_id)
|
||||
return
|
||||
|
||||
time.sleep(0.25)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PATH NORMALISATION
|
||||
# ------------------------------------------------------------
|
||||
container_path = chapter_path
|
||||
|
||||
if not container_path:
|
||||
log(f"[AUDIO] CH{chapter_number}: ERROR — no input file path provided")
|
||||
redis_client.delete(slot_key)
|
||||
inc_audio_skipped(book_id)
|
||||
return
|
||||
|
||||
# Strip container prefix so that host path is resolvable
|
||||
if CONTAINER_PREFIX and container_path.startswith(CONTAINER_PREFIX):
|
||||
relative_path = container_path[len(CONTAINER_PREFIX) :].lstrip("/")
|
||||
else:
|
||||
relative_path = container_path
|
||||
|
||||
parts = relative_path.split("/")
|
||||
if len(parts) < 3:
|
||||
log(
|
||||
f"[AUDIO] CH{chapter_number}: ERROR — cannot parse book/volume from {relative_path}"
|
||||
)
|
||||
redis_client.delete(slot_key)
|
||||
inc_audio_skipped(book_id)
|
||||
return
|
||||
|
||||
# book_from_path = parts[0] # volume_name passed explicitly anyway
|
||||
# volume_from_path = parts[1]
|
||||
host_path = os.path.join(HOST_PATH, relative_path)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# OUTPUT DIRECTORY
|
||||
# ------------------------------------------------------------
|
||||
base_dir = os.path.join(HOST_PATH, parts[0], parts[1], "Audio")
|
||||
os.makedirs(base_dir, exist_ok=True)
|
||||
|
||||
safe_num = f"{chapter_number:04d}"
|
||||
audio_file = os.path.join(base_dir, f"{safe_num}.m4b")
|
||||
|
||||
# Skip if audio already exists
|
||||
if os.path.exists(audio_file):
|
||||
log(f"[AUDIO] CH{chapter_number}: Already exists → skip")
|
||||
redis_client.delete(slot_key)
|
||||
inc_audio_skipped(book_id)
|
||||
trigger_audio_completion_check(book_id)
|
||||
return
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# BUILD TTS COMMAND
|
||||
# ------------------------------------------------------------
|
||||
cmd = (
|
||||
f"say --voice={AUDIO_VOICE} "
|
||||
f"--input-file='{host_path}' "
|
||||
f"--output-file='{audio_file}' "
|
||||
f"--file-format=m4bf "
|
||||
f"--quality=127 "
|
||||
f"-r {AUDIO_RATE} "
|
||||
f"--data-format=aac"
|
||||
)
|
||||
|
||||
log(f"[AUDIO]({HOST}) CH{chapter_number} → output: {audio_file}")
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# EXECUTE
|
||||
# ------------------------------------------------------------
|
||||
try:
|
||||
subprocess.run(cmd, shell=True, check=True, timeout=AUDIO_TIMEOUT)
|
||||
|
||||
# NEW — repository façade
|
||||
inc_audio_done(book_id)
|
||||
trigger_audio_completion_check(book_id)
|
||||
log(f"trigger_audio_completion_check ")
|
||||
log(f"[AUDIO]({HOST}) CH{chapter_number}: Completed")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
log(f"[AUDIO]({HOST}) CH{chapter_number}: TIMEOUT → removing file")
|
||||
if os.path.exists(audio_file):
|
||||
try:
|
||||
os.remove(audio_file)
|
||||
except Exception:
|
||||
pass
|
||||
inc_audio_skipped(book_id)
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
log(f"[AUDIO] CH{chapter_number}: ERROR during say → {e}")
|
||||
inc_audio_skipped(book_id)
|
||||
|
||||
except Exception as e:
|
||||
log(f"[AUDIO] CH{chapter_number}: UNEXPECTED ERROR → {e}")
|
||||
inc_audio_skipped(book_id)
|
||||
|
||||
finally:
|
||||
if slot_key:
|
||||
redis_client.delete(slot_key)
|
||||
log(f"[AUDIO] CH{chapter_number}: Released slot")
|
||||
@ -1,21 +1,167 @@
|
||||
# scraper/tasks/controller_tasks.py
|
||||
# ============================================================
|
||||
# File: scraper/tasks/controller_tasks.py
|
||||
# Purpose:
|
||||
# FULL scrape entrypoint + launching download/parse/save pipelines.
|
||||
# NO result.get() anywhere. Scraping is done inline.
|
||||
# ============================================================
|
||||
|
||||
from celery_app import celery_app
|
||||
from logbus.publisher import log
|
||||
|
||||
import os
|
||||
import time
|
||||
import redis
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from scraper.logger_decorators import logcall
|
||||
from scraper.abort import abort_requested
|
||||
|
||||
from scraper.services.scrape_engine import ScrapeEngine
|
||||
from scraper.services.site_resolver import SiteResolver
|
||||
|
||||
from db.repository import fetch_book, set_chapters_total
|
||||
from scraper.download_controller import DownloadController
|
||||
|
||||
|
||||
print(">>> [IMPORT] controller_tasks.py loaded")
|
||||
|
||||
|
||||
@celery_app.task(bind=True, queue="controller", ignore_result=False)
|
||||
def launch_downloads(self, scrape_result: dict):
|
||||
"""Start complete download → parse → save pipeline."""
|
||||
# =============================================================
|
||||
# 1) PUBLIC ENTRYPOINT — CALLED FROM /start
|
||||
# =============================================================
|
||||
@celery_app.task(
|
||||
bind=True,
|
||||
queue="controller",
|
||||
ignore_result=False,
|
||||
name="scraper.tasks.controller_tasks.start_full_scrape",
|
||||
)
|
||||
@logcall
|
||||
def start_full_scrape(self, book_idx: str):
|
||||
"""
|
||||
FULL SCRAPE ENTRYPOINT.
|
||||
Scraping is done inline → no Celery .get() needed.
|
||||
"""
|
||||
|
||||
log(f"[CTRL] start_full_scrape(book_idx={book_idx})")
|
||||
|
||||
# Abort before doing anything
|
||||
if abort_requested(book_idx):
|
||||
log(f"[CTRL] PRE-ABORT flag detected for {book_idx}")
|
||||
return {"book_idx": book_idx, "aborted": True, "reason": "pre-abort"}
|
||||
|
||||
# --------------------------------------------------------
|
||||
# 1) Load book metadata from SQLite
|
||||
# --------------------------------------------------------
|
||||
book = fetch_book(book_idx)
|
||||
if not book:
|
||||
msg = f"[CTRL] Book '{book_idx}' not found in DB"
|
||||
log(msg)
|
||||
raise ValueError(msg)
|
||||
|
||||
url = book.get("book_url")
|
||||
if not url:
|
||||
msg = f"[CTRL] No book_url stored for {book_idx}"
|
||||
log(msg)
|
||||
raise ValueError(msg)
|
||||
|
||||
# --------------------------------------------------------
|
||||
# 2) INLINE SCRAPE (fast, no Celery wait)
|
||||
# --------------------------------------------------------
|
||||
site = SiteResolver.resolve(url)
|
||||
|
||||
try:
|
||||
scrape_result = ScrapeEngine.fetch_metadata_and_chapters(site, url)
|
||||
log(f"[CTRL] Scrape OK for {book_idx}: {scrape_result.get('title')}")
|
||||
except Exception as e:
|
||||
log(f"[CTRL] ERROR during scrape of {book_idx}: {e}")
|
||||
raise
|
||||
|
||||
# --------------------------------------------------------
|
||||
# 3) Continue → dispatch pipelines
|
||||
# --------------------------------------------------------
|
||||
return launch_downloads(book_idx, scrape_result)
|
||||
|
||||
|
||||
# =============================================================
|
||||
# 2) PIPELINE DISPATCH (NOT a Celery task)
|
||||
# =============================================================
|
||||
@logcall
|
||||
def launch_downloads(book_idx: str, scrape_result: dict):
|
||||
"""
|
||||
Launches the entire processing pipeline:
|
||||
- initialize Redis UI state
|
||||
- initialize SQLite totals
|
||||
- dispatch per-chapter pipelines via DownloadController
|
||||
"""
|
||||
|
||||
title = scrape_result.get("title", "UnknownBook")
|
||||
chapters = scrape_result.get("chapters", []) or []
|
||||
total = len(chapters)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# INIT REDIS STATE
|
||||
# ------------------------------------------------------------
|
||||
broker_url = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
|
||||
parsed = urlparse(broker_url)
|
||||
|
||||
r = redis.Redis(
|
||||
host=parsed.hostname,
|
||||
port=parsed.port,
|
||||
db=int(parsed.path.strip("/")),
|
||||
decode_responses=True,
|
||||
)
|
||||
|
||||
base = f"book:{book_idx}:state"
|
||||
|
||||
r.hset(base, "title", title)
|
||||
r.hset(base, "status", "starting")
|
||||
r.hset(base, "chapters_total", total)
|
||||
r.hset(base, "chapters_download_done", 0)
|
||||
r.hset(base, "chapters_download_skipped", 0)
|
||||
r.hset(base, "chapters_parsed_done", 0)
|
||||
r.hset(base, "audio_done", 0)
|
||||
r.hset(base, "audio_skipped", 0)
|
||||
r.hset(base, "last_update", int(time.time()))
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# INIT SQLITE SNAPSHOT
|
||||
# ------------------------------------------------------------
|
||||
try:
|
||||
set_chapters_total(book_idx, total)
|
||||
except Exception as e:
|
||||
log(f"[CTRL] ERROR updating SQLite totals: {e}")
|
||||
raise
|
||||
|
||||
log(f"[CTRL] Initialized totals for {book_idx}: {total}")
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# ABORT CHECK BEFORE LAUNCHING JOBS
|
||||
# ------------------------------------------------------------
|
||||
if abort_requested(book_idx):
|
||||
log(f"[CTRL] ABORT flag detected — stopping BEFORE dispatch for {book_idx}")
|
||||
r.hset(base, "status", "aborted")
|
||||
return {"book_idx": book_idx, "aborted": True, "reason": "abort-before-start"}
|
||||
|
||||
log("[CTRL] Launching DownloadController...")
|
||||
# ------------------------------------------------------------
|
||||
# BUILD + DISPATCH PER-CHAPTER PIPELINES
|
||||
# ------------------------------------------------------------
|
||||
controller = DownloadController(book_idx, scrape_result)
|
||||
|
||||
ctl = DownloadController(scrape_result)
|
||||
async_result = ctl.start()
|
||||
try:
|
||||
group_result = controller.start()
|
||||
gid = getattr(group_result, "id", None)
|
||||
log(f"[CTRL] Pipelines dispatched for {book_idx} (group_id={gid})")
|
||||
except Exception as e:
|
||||
log(f"[CTRL] ERROR dispatching pipelines for {book_idx}: {e}")
|
||||
raise
|
||||
|
||||
log("[CTRL] Pipelines dispatched.")
|
||||
# Update UI state to "downloading"
|
||||
r.hset(base, "status", "downloading")
|
||||
r.hset(base, "last_update", int(time.time()))
|
||||
|
||||
return {"pipelines_started": len(scrape_result.get("chapters", []))}
|
||||
return {
|
||||
"book_idx": book_idx,
|
||||
"total": total,
|
||||
"started": True,
|
||||
"group_id": gid,
|
||||
}
|
||||
|
||||
@ -0,0 +1,132 @@
|
||||
# ============================================================
|
||||
# File: scraper/tasks/m4b_tasks.py
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from typing import List
|
||||
|
||||
from celery_app import celery_app
|
||||
from logbus.publisher import log
|
||||
from scraper.logger_decorators import logcall
|
||||
|
||||
from db.repository import fetch_book, store_m4b_error
|
||||
from scraper.scriptgen import build_merge_block
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Helper: detect volumes (UNCHANGED)
|
||||
# ------------------------------------------------------------
|
||||
def detect_volumes(book_base: str) -> List[str]:
|
||||
volumes = []
|
||||
for name in os.listdir(book_base):
|
||||
if name.lower().startswith("volume_"):
|
||||
full = os.path.join(book_base, name)
|
||||
if os.path.isdir(full):
|
||||
volumes.append(name)
|
||||
volumes.sort()
|
||||
return volumes
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Celery task
|
||||
# ------------------------------------------------------------
|
||||
@celery_app.task(bind=True, queue="m4b", ignore_result=True)
|
||||
@logcall
|
||||
def run_m4btool(self, book_idx: str):
|
||||
|
||||
log(f"[M4B] START book_idx={book_idx}")
|
||||
|
||||
book = fetch_book(book_idx)
|
||||
if not book:
|
||||
log(f"[M4B] Book not found in SQL: book_idx={book_idx}")
|
||||
return
|
||||
|
||||
title = book.get("title", book_idx)
|
||||
author = book.get("author", "Unknown")
|
||||
|
||||
output_root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
|
||||
book_base = os.path.join(output_root, title)
|
||||
|
||||
log(f"[M4B] Book base directory: {book_base}")
|
||||
|
||||
if not os.path.isdir(book_base):
|
||||
log(f"[M4B] Book directory missing: {book_base}")
|
||||
return
|
||||
|
||||
volumes = detect_volumes(book_base)
|
||||
if not volumes:
|
||||
log(f"[M4B] No volumes found for book_idx={book_idx}")
|
||||
return
|
||||
|
||||
log(f"[M4B] Volumes detected: {volumes}")
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Build canonical commands via scriptgen
|
||||
# --------------------------------------------------------
|
||||
merge_block = build_merge_block(
|
||||
title, author, [(i + 1, v) for i, v in enumerate(volumes)]
|
||||
)
|
||||
commands = [c.strip() for c in merge_block.split("&&") if c.strip()]
|
||||
|
||||
for volume, cmd in zip(volumes, commands):
|
||||
audio_dir = os.path.join(book_base, volume, "Audio")
|
||||
if not os.path.isdir(audio_dir):
|
||||
log(f"[M4B] SKIP {volume}: no Audio directory")
|
||||
continue
|
||||
|
||||
log(f"[M4B] Running for volume={volume}")
|
||||
log(f"[M4B] CMD: {cmd}")
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
cwd=book_base,
|
||||
shell=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
|
||||
if result.stdout:
|
||||
log(f"[M4B][STDOUT] {result.stdout}")
|
||||
|
||||
except subprocess.CalledProcessError as exc:
|
||||
log(f"[M4B][FAILED] volume={volume}")
|
||||
|
||||
if exc.stdout:
|
||||
log(f"[M4B][STDOUT] {exc.stdout}")
|
||||
if exc.stderr:
|
||||
log(f"[M4B][STDERR] {exc.stderr}")
|
||||
|
||||
store_m4b_error(
|
||||
book_idx=book_idx,
|
||||
volume=volume,
|
||||
error_text=exc.stderr or str(exc),
|
||||
)
|
||||
continue
|
||||
|
||||
except Exception as exc:
|
||||
log(f"[M4B][UNEXPECTED ERROR] volume={volume}: {exc}")
|
||||
|
||||
store_m4b_error(
|
||||
book_idx=book_idx,
|
||||
volume=volume,
|
||||
error_text=str(exc),
|
||||
)
|
||||
continue
|
||||
|
||||
log(f"[M4B] FINISHED book_idx={book_idx}")
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Orchestration helper (UNCHANGED)
|
||||
# ------------------------------------------------------------
|
||||
@logcall
|
||||
def queue_m4b_for_book(book_idx: str):
|
||||
log(f"[M4B] Queuing m4b-tool for book_idx={book_idx}")
|
||||
celery_app.send_task(
|
||||
"scraper.tasks.m4b_tasks.run_m4btool",
|
||||
args=[book_idx],
|
||||
queue="m4b",
|
||||
)
|
||||
@ -1,21 +1,50 @@
|
||||
# scraper/tasks/pipeline.py
|
||||
# =========================================================
|
||||
# File: scraper/tasks/pipeline.py
|
||||
# Purpose:
|
||||
# Build Celery chains for chapter processing using payload dict.
|
||||
#
|
||||
# Pipeline v3:
|
||||
# download_chapter(payload)
|
||||
# → parse_chapter(payload)
|
||||
# → save_chapter(payload)
|
||||
#
|
||||
# NOTE:
|
||||
# - book_idx is the single authoritative key for all tasks
|
||||
# - payload travels unchanged through the entire pipeline
|
||||
# =========================================================
|
||||
|
||||
from celery import chain
|
||||
|
||||
from scraper.tasks.download_tasks import download_chapter
|
||||
from scraper.tasks.parse_tasks import parse_chapter
|
||||
from scraper.tasks.save_tasks import save_chapter
|
||||
|
||||
from scraper.logger_decorators import logcall
|
||||
|
||||
|
||||
def build_chapter_pipeline(
|
||||
chapter_number: int, chapter_url: str, base_path: str, meta: dict
|
||||
):
|
||||
@logcall
|
||||
def build_chapter_pipeline(book_idx: str, chapter_dict: dict, book_meta: dict):
|
||||
"""
|
||||
Build a download → parse → save pipeline for one chapter.
|
||||
meta bevat:
|
||||
title, author, description
|
||||
Create a payload object passed through the pipeline.
|
||||
Consistent with the chapter_dict-based task signature.
|
||||
"""
|
||||
|
||||
payload = {
|
||||
"book_idx": book_idx,
|
||||
"chapter": chapter_dict,
|
||||
"book_meta": book_meta,
|
||||
# Will be filled by download_chapter
|
||||
"html": None,
|
||||
# Will be filled by parse_chapter
|
||||
"parsed": None,
|
||||
# Set by download or parse on skip/404/etc
|
||||
"skipped": False,
|
||||
# Final path written by save_chapter
|
||||
"path": None,
|
||||
}
|
||||
|
||||
return chain(
|
||||
download_chapter.s(chapter_number, chapter_url),
|
||||
parse_chapter.s(meta), # ← METADATA DOORGEVEN
|
||||
save_chapter.s(base_path),
|
||||
download_chapter.s(payload),
|
||||
parse_chapter.s(),
|
||||
save_chapter.s(),
|
||||
)
|
||||
|
||||
@ -1,37 +1,84 @@
|
||||
# scraper/tasks/save_tasks.py
|
||||
# ============================================================
|
||||
# File: scraper/tasks/save_tasks.py (RESTORED AUDIO LOGIC + book_idx)
|
||||
# ============================================================
|
||||
|
||||
print(">>> [IMPORT] save_tasks.py loaded")
|
||||
|
||||
from celery import shared_task
|
||||
from logbus.publisher import log
|
||||
import os
|
||||
|
||||
from logbus.publisher import log
|
||||
from scraper.logger_decorators import logcall
|
||||
from scraper.utils.utils import get_save_path
|
||||
from scraper.tasks.download_tasks import log_msg
|
||||
from scraper.tasks.audio_tasks import generate_audio
|
||||
|
||||
from db.repository import inc_download_done, inc_download_skipped
|
||||
|
||||
|
||||
@shared_task(bind=True, queue="save", ignore_result=False)
|
||||
def save_chapter(self, parsed: dict, base_path: str):
|
||||
print(f">>> [save_tasks] save_chapter() CALLED for chapter {parsed.get('chapter')}")
|
||||
@logcall
|
||||
def save_chapter(self, payload: dict):
|
||||
|
||||
try:
|
||||
chapter_number = parsed.get("chapter")
|
||||
url = parsed.get("url")
|
||||
text = parsed.get("text", "")
|
||||
if not payload:
|
||||
log("[SAVE] ERROR: payload is None")
|
||||
return {"error": True}
|
||||
|
||||
# NEW unified ID
|
||||
book_idx = payload["book_idx"]
|
||||
|
||||
if not chapter_number:
|
||||
raise ValueError("Missing chapter_number in parsed payload")
|
||||
chapter = payload["chapter"]
|
||||
parsed = payload.get("parsed")
|
||||
path = payload.get("path")
|
||||
skipped = payload.get("skipped")
|
||||
|
||||
num = chapter["num"]
|
||||
title = chapter.get("title") or f"Chapter {num}"
|
||||
volume = chapter.get("volume_path")
|
||||
volume_name = os.path.basename(volume.rstrip("/"))
|
||||
|
||||
# ============================================================
|
||||
# SKIPPED CASE (old behavior restored)
|
||||
# ============================================================
|
||||
if skipped or not parsed:
|
||||
log_msg(book_idx, f"[SAVE] SKIP chapter {num}")
|
||||
inc_download_skipped(book_idx)
|
||||
|
||||
# OLD behavior: even skipped chapters still queue audio
|
||||
if path and os.path.exists(path):
|
||||
log_msg(book_idx, f"[AUDIO] Queueing audio for SKIPPED chapter {num}")
|
||||
try:
|
||||
generate_audio.delay(book_idx, volume_name, num, title, path)
|
||||
except Exception as exc:
|
||||
log_msg(book_idx, f"[AUDIO] ERROR queueing skipped audio: {exc}")
|
||||
|
||||
return payload
|
||||
|
||||
# ============================================================
|
||||
# NORMAL SAVE CASE
|
||||
# ============================================================
|
||||
try:
|
||||
os.makedirs(volume, exist_ok=True)
|
||||
save_path = get_save_path(num, volume)
|
||||
|
||||
os.makedirs(base_path, exist_ok=True)
|
||||
with open(save_path, "w", encoding="utf-8") as f:
|
||||
f.write(parsed)
|
||||
|
||||
filename = f"{chapter_number:05d}.txt"
|
||||
path = os.path.join(base_path, filename)
|
||||
log_msg(book_idx, f"[SAVE] Saved chapter {num} → {save_path}")
|
||||
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(text)
|
||||
inc_download_done(book_idx)
|
||||
|
||||
log(f"[SAVE] Saved chapter {chapter_number} → {path}")
|
||||
print(f">>> [save_tasks] SAVED {path}")
|
||||
# OLD behavior: ALWAYS queue audio
|
||||
try:
|
||||
generate_audio.delay(book_idx, volume_name, num, title, save_path)
|
||||
log_msg(book_idx, f"[AUDIO] Task queued for chapter {num}")
|
||||
except Exception as exc:
|
||||
log_msg(book_idx, f"[AUDIO] ERROR queueing chapter {num}: {exc}")
|
||||
|
||||
return {"chapter": chapter_number, "path": path}
|
||||
payload["path"] = save_path
|
||||
payload["skipped"] = False
|
||||
return payload
|
||||
|
||||
except Exception as exc:
|
||||
log(f"[SAVE] ERROR saving chapter from {url}: {exc}")
|
||||
print(f">>> [save_tasks] ERROR: {exc}")
|
||||
log_msg(book_idx, f"[SAVE] ERROR saving chapter {num}: {exc}")
|
||||
raise
|
||||
|
||||
@ -1,52 +1,101 @@
|
||||
# scraper/tasks/scraping.py
|
||||
#
|
||||
# ============================================================
|
||||
# File: scraper/tasks/scraping.py
|
||||
# Purpose:
|
||||
# Scrape ONLY metadata + chapter list.
|
||||
# Does NOT launch download controller anymore.
|
||||
# Controller decides when pipelines start.
|
||||
# ============================================================
|
||||
|
||||
from celery_app import celery_app
|
||||
from logbus.publisher import log
|
||||
import os
|
||||
import redis
|
||||
|
||||
from scraper.logger_decorators import logcall
|
||||
from scraper.sites import BookSite
|
||||
from scraper.book_scraper import BookScraper
|
||||
from scraper.tasks.controller_tasks import launch_downloads
|
||||
from scraper.abort import clear_abort
|
||||
from scraper.ui_log import reset_ui_logs
|
||||
|
||||
from scraper.services.init_service import InitService
|
||||
|
||||
print(">>> [IMPORT] scraping.py loaded")
|
||||
|
||||
# Redis connection (same DB as Celery broker)
|
||||
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
|
||||
r = redis.Redis.from_url(REDIS_URL, decode_responses=True)
|
||||
|
||||
|
||||
@celery_app.task(bind=True, queue="scraping", ignore_result=False)
|
||||
@celery_app.task(
|
||||
bind=True,
|
||||
queue="scraping",
|
||||
ignore_result=False,
|
||||
name="scraper.tasks.scraping.start_scrape_book",
|
||||
)
|
||||
@logcall
|
||||
def start_scrape_book(self, url: str):
|
||||
"""Scrapes metadata + chapter list."""
|
||||
"""
|
||||
Scrapes metadata + chapters.
|
||||
DOES NOT START download / pipeline controller.
|
||||
The controller_tasks.start_full_scrape() task will call this one.
|
||||
"""
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# CLEAR UI LOG BUFFER
|
||||
# ------------------------------------------------------------
|
||||
reset_ui_logs()
|
||||
log(f"[SCRAPING] Start scraping for: {url}")
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# SCRAPE (old engine)
|
||||
# ------------------------------------------------------------
|
||||
site = BookSite()
|
||||
scraper = BookScraper(site, url)
|
||||
scraper.parse_book_info()
|
||||
result = scraper.execute() # → { title, author, chapters, cover_url, ... }
|
||||
|
||||
chapters = scraper.get_chapter_list()
|
||||
chapters = result.get("chapters", [])
|
||||
full_count = len(chapters)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Compute unified book_idx
|
||||
# ------------------------------------------------------------
|
||||
book_idx = InitService.derive_book_id(url)
|
||||
result["book_idx"] = book_idx
|
||||
|
||||
log(f"[SCRAPING] Assigned book_idx = {book_idx}")
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# DRY RUN TEST LIMIT
|
||||
# ------------------------------------------------------------
|
||||
DRY_RUN = os.getenv("DRY_RUN", "0") == "1"
|
||||
TEST_LIMIT = int(os.getenv("TEST_LIMIT", "5"))
|
||||
|
||||
if DRY_RUN:
|
||||
log(f"[SCRAPING] DRY_RUN: limiting chapters to first {TEST_LIMIT}")
|
||||
chapters = chapters[:TEST_LIMIT]
|
||||
|
||||
result = {
|
||||
"title": scraper.book_title,
|
||||
"author": scraper.book_author,
|
||||
"description": scraper.book_description,
|
||||
"cover": scraper.cover_url,
|
||||
"chapters": [
|
||||
{"num": ch.number, "title": ch.title, "url": ch.url} for ch in chapters
|
||||
],
|
||||
}
|
||||
|
||||
log(f"[SCRAPING] Completed scrape: {len(chapters)}/{full_count} chapters")
|
||||
|
||||
celery_app.send_task(
|
||||
"scraper.tasks.controller_tasks.launch_downloads",
|
||||
args=[result],
|
||||
queue="controller",
|
||||
log(f"[SCRAPING] DRY RUN active → limiting chapters to {TEST_LIMIT}")
|
||||
result["chapters"] = chapters[:TEST_LIMIT]
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# LOG RESULTS
|
||||
# ------------------------------------------------------------
|
||||
log(
|
||||
f"[SCRAPING] Completed scrape: "
|
||||
f"{len(result['chapters'])}/{full_count} chapters"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# RESET ABORT + INITIALIZE LEGACY PROGRESS
|
||||
# ------------------------------------------------------------
|
||||
clear_abort(book_idx)
|
||||
|
||||
r.set(f"progress:{book_idx}:total", len(result["chapters"]))
|
||||
r.set(f"progress:{book_idx}:done", 0)
|
||||
|
||||
r.delete(f"logs:{book_idx}")
|
||||
r.rpush(f"logs:{book_idx}", f":: SCRAPING STARTED for {url}")
|
||||
r.rpush(f"logs:{book_idx}", f":: Found {len(result['chapters'])} chapters")
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# IMPORTANT: DO NOT DISPATCH any pipelines here
|
||||
# Controller will receive scrape_result and continue.
|
||||
# ------------------------------------------------------------
|
||||
return result
|
||||
|
||||
@ -0,0 +1,149 @@
|
||||
# ============================================================
|
||||
# File: scraper/tasks/statuscheck.py
|
||||
# Purpose:
|
||||
# Final status check after audio completion.
|
||||
#
|
||||
# Responsibilities:
|
||||
# - Verify Redis counters (sanity check)
|
||||
# - Verify filesystem (Audio files present)
|
||||
# - Queue m4btool task
|
||||
#
|
||||
# Design rules:
|
||||
# - Book-scope ONLY
|
||||
# - No direct Redis usage
|
||||
# - Repository is the single source of truth
|
||||
# - Idempotent, defensive, non-blocking
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
from celery_app import celery_app
|
||||
from logbus.publisher import log
|
||||
|
||||
from scraper.logger_decorators import logcall
|
||||
|
||||
from db.repository import (
|
||||
get_audio_done,
|
||||
get_chapters_total,
|
||||
set_status,
|
||||
fetch_book,
|
||||
)
|
||||
|
||||
from scraper.tasks.m4b_tasks import run_m4btool
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Helpers
|
||||
# ------------------------------------------------------------
|
||||
@log
|
||||
def _detect_volumes(book_base: str):
|
||||
"""
|
||||
Return sorted list of Volume_XXX directories.
|
||||
"""
|
||||
vols = []
|
||||
for name in os.listdir(book_base):
|
||||
if name.lower().startswith("volume_"):
|
||||
full = os.path.join(book_base, name)
|
||||
if os.path.isdir(full):
|
||||
vols.append(name)
|
||||
vols.sort()
|
||||
return vols
|
||||
|
||||
|
||||
@logcall
|
||||
def _count_audio_files(audio_dir: str) -> int:
|
||||
"""
|
||||
Count .m4b files in an Audio directory.
|
||||
"""
|
||||
if not os.path.isdir(audio_dir):
|
||||
return 0
|
||||
return len([f for f in os.listdir(audio_dir) if f.lower().endswith(".m4b")])
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Celery task
|
||||
# ------------------------------------------------------------
|
||||
@celery_app.task(bind=True, queue="controller", ignore_result=True)
|
||||
@logcall
|
||||
def run_statuscheck(self, book_idx: str):
|
||||
"""
|
||||
Final statuscheck before m4btool execution.
|
||||
|
||||
Triggered exactly once by audio_completion quickcheck.
|
||||
"""
|
||||
|
||||
log(f"[STATUSCHECK] START book={book_idx}")
|
||||
|
||||
# --------------------------------------------------------
|
||||
# 1. Redis sanity check (via repository)
|
||||
# --------------------------------------------------------
|
||||
audio_done = get_audio_done(book_idx)
|
||||
chapters_total = get_chapters_total(book_idx)
|
||||
|
||||
log(
|
||||
f"[STATUSCHECK] Counters book={book_idx} "
|
||||
f"audio_done={audio_done} chapters_total={chapters_total}"
|
||||
)
|
||||
|
||||
if chapters_total <= 0:
|
||||
log(f"[STATUSCHECK] No chapters_total → abort")
|
||||
return
|
||||
|
||||
if audio_done < chapters_total:
|
||||
# Defensive: should not happen, but never assume
|
||||
log(
|
||||
f"[STATUSCHECK] Audio not complete yet "
|
||||
f"({audio_done}/{chapters_total}) → abort"
|
||||
)
|
||||
return
|
||||
|
||||
# --------------------------------------------------------
|
||||
# 2. Fetch book metadata (for paths & m4b meta)
|
||||
# --------------------------------------------------------
|
||||
book = fetch_book(book_idx)
|
||||
if not book:
|
||||
log(f"[STATUSCHECK] Book not found in DB: {book_idx}")
|
||||
return
|
||||
|
||||
title = book.get("title") or book_idx
|
||||
author = book.get("author") or "Unknown"
|
||||
|
||||
# Base output directory
|
||||
root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
|
||||
book_base = os.path.join(root, title)
|
||||
|
||||
if not os.path.isdir(book_base):
|
||||
log(f"[STATUSCHECK] Book directory missing: {book_base}")
|
||||
return
|
||||
|
||||
# --------------------------------------------------------
|
||||
# 3. Filesystem validation (light, non-blocking)
|
||||
# --------------------------------------------------------
|
||||
volumes = _detect_volumes(book_base)
|
||||
|
||||
if not volumes:
|
||||
log(f"[STATUSCHECK] No volumes found for {book_idx}")
|
||||
# Still allow m4btool to decide (it will no-op)
|
||||
else:
|
||||
for vol in volumes:
|
||||
audio_dir = os.path.join(book_base, vol, "Audio")
|
||||
count = _count_audio_files(audio_dir)
|
||||
|
||||
log(f"[STATUSCHECK] {vol}: " f"{count} audio files detected")
|
||||
|
||||
# --------------------------------------------------------
|
||||
# 4. Queue m4btool (final pipeline step)
|
||||
# --------------------------------------------------------
|
||||
log(f"[STATUSCHECK] Queue m4btool for book={book_idx}")
|
||||
|
||||
set_status(book_idx, "m4b_running")
|
||||
|
||||
run_m4btool.delay(
|
||||
book_idx=book_idx,
|
||||
book_base=book_base,
|
||||
meta={
|
||||
"title": title,
|
||||
"author": author,
|
||||
},
|
||||
)
|
||||
|
||||
log(f"[STATUSCHECK] DONE book={book_idx}")
|
||||
@ -1,57 +0,0 @@
|
||||
# scraper/utils.py
|
||||
|
||||
import re
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Load replacements from text_replacements.txt (optional file)
|
||||
# ------------------------------------------------------------
|
||||
def load_replacements(filepath="text_replacements.txt") -> dict:
|
||||
"""
|
||||
Load key=value style replacements.
|
||||
Empty or missing file → return {}.
|
||||
"""
|
||||
path = Path(filepath)
|
||||
|
||||
if not path.exists():
|
||||
return {}
|
||||
|
||||
repl = {}
|
||||
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if "=" in line:
|
||||
key, val = line.split("=", 1)
|
||||
repl[key.strip()] = val.strip()
|
||||
|
||||
return repl
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Clean extracted HTML text
|
||||
# ------------------------------------------------------------
|
||||
def clean_text(raw: str, repl_dict: dict = None) -> str:
|
||||
"""
|
||||
Normalizes whitespace, removes junk, and applies replacements.
|
||||
repl_dict is optional → falls back to {}.
|
||||
"""
|
||||
if repl_dict is None:
|
||||
repl_dict = {}
|
||||
|
||||
txt = raw
|
||||
|
||||
# Normalize CRLF
|
||||
txt = txt.replace("\r", "")
|
||||
|
||||
# Collapse multiple blank lines
|
||||
txt = re.sub(r"\n{3,}", "\n\n", txt)
|
||||
|
||||
# Apply replacements
|
||||
for key, val in repl_dict.items():
|
||||
txt = txt.replace(key, val)
|
||||
|
||||
# Strip excessive whitespace at edges
|
||||
return txt.strip()
|
||||
@ -0,0 +1,44 @@
|
||||
#!/bin/sh
|
||||
|
||||
main_dir="$( cd "$( dirname "$0" )" && pwd )"
|
||||
|
||||
shopt -s nocasematch # For case-insensitive regex matching
|
||||
|
||||
for subfolder in "$main_dir"/*; do
|
||||
|
||||
if [ -d "$subfolder" ]; then
|
||||
audiofolder="$subfolder/Audio"
|
||||
mkdir -p "$audiofolder"
|
||||
|
||||
for entry in "$subfolder"/*.txt; do
|
||||
fn=$(basename "$entry")
|
||||
[[ "${entry##*.}" =~ txt ]]
|
||||
|
||||
echo "$fn"
|
||||
inputfile="$subfolder/$fn"
|
||||
outputfile="$audiofolder/${fn%.*}.m4b"
|
||||
|
||||
now=$(date +"%T")
|
||||
echo "Current time : $now"
|
||||
echo "$inputfile ->"
|
||||
echo "$outputfile" && \
|
||||
|
||||
if [ -f $outputfile ]; then
|
||||
echo $outputfile + "exists: skipping"
|
||||
else
|
||||
say --voice=Sinji \
|
||||
--output-file="$outputfile" \
|
||||
--input-file="$inputfile" \
|
||||
--file-format=m4bf \
|
||||
--quality=127 \
|
||||
-r 200 \
|
||||
--data-format=aac
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
# CLEANUP WILL BE APPENDED BY scriptgen.py
|
||||
@ -0,0 +1,4 @@
|
||||
find . -name "*.m4b" -size -580c | while read fname; do
|
||||
echo "deleting $(ls -lah \"$fname\")"
|
||||
rm "$fname"
|
||||
done
|
||||
@ -0,0 +1,38 @@
|
||||
#!/bin/sh
|
||||
|
||||
main_dir="$( cd "$( dirname "$0" )" && pwd )"
|
||||
|
||||
shopt -s nocasematch
|
||||
|
||||
for subfolder in "$main_dir"/*; do
|
||||
if [ -d "$subfolder" ]; then
|
||||
audiofolder="$subfolder/Audio"
|
||||
mkdir -p "$audiofolder"
|
||||
|
||||
for entry in "$subfolder"/*.txt; do
|
||||
fn=$(basename "$entry")
|
||||
[[ "${entry##*.}" =~ txt ]]
|
||||
|
||||
echo "$fn"
|
||||
inputfile="$subfolder/$fn"
|
||||
outputfile="$audiofolder/${fn%.*}.m4b"
|
||||
|
||||
now=$(date +"%T")
|
||||
echo "Current time : $now"
|
||||
echo "$inputfile ->"
|
||||
echo "$outputfile"
|
||||
|
||||
if [ -f "$outputfile" ]; then
|
||||
echo "$outputfile exists: skipping"
|
||||
else
|
||||
say --voice=Sinji \
|
||||
--output-file="$outputfile" \
|
||||
--input-file="$inputfile" \
|
||||
--file-format=m4bf \
|
||||
--quality=127 \
|
||||
-r 200 \
|
||||
--data-format=aac
|
||||
fi
|
||||
done
|
||||
fi
|
||||
done
|
||||
@ -0,0 +1,74 @@
|
||||
# ============================================
|
||||
# File: scraper/ui_log.py
|
||||
# Purpose: Central UI log buffer for WebGUI
|
||||
# Single global buffer. No book_id.
|
||||
# ============================================
|
||||
|
||||
import redis
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
|
||||
LOG_BUFFER_SIZE = int(os.getenv("LOG_BUFFER_SIZE", "1000"))
|
||||
|
||||
r = redis.Redis.from_url(REDIS_URL, decode_responses=True)
|
||||
|
||||
UI_LOG_KEY = "logs:ui"
|
||||
|
||||
|
||||
def push_ui(message: str):
|
||||
"""Push a message into the global UI log (no book_id)."""
|
||||
if not message or not message.strip():
|
||||
return
|
||||
|
||||
ts = datetime.now().strftime("%H:%M:%S")
|
||||
entry = f"[{ts}] {message}"
|
||||
|
||||
r.rpush(UI_LOG_KEY, entry)
|
||||
r.ltrim(UI_LOG_KEY, -LOG_BUFFER_SIZE, -1)
|
||||
|
||||
|
||||
def get_ui_logs(limit: int = None):
|
||||
"""Return last N global UI log lines."""
|
||||
if limit is None:
|
||||
limit = LOG_BUFFER_SIZE
|
||||
|
||||
return r.lrange(UI_LOG_KEY, -limit, -1)
|
||||
|
||||
|
||||
def reset_ui_logs():
|
||||
"""
|
||||
Clear the entire UI log buffer.
|
||||
Used by:
|
||||
- Clear button in GUI
|
||||
- Auto-clear when new book scraping starts
|
||||
"""
|
||||
r.delete(UI_LOG_KEY)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Delta-based log retrieval using Redis indexes
|
||||
# ============================================================
|
||||
|
||||
|
||||
def get_ui_logs_delta(last_index: int):
|
||||
"""
|
||||
Returns (new_lines, total_count).
|
||||
Only returns log lines AFTER last_index.
|
||||
|
||||
Example:
|
||||
last_index = 10 → returns logs with Redis indexes 11..end
|
||||
"""
|
||||
total = r.llen(UI_LOG_KEY)
|
||||
|
||||
if total == 0:
|
||||
return [], 0
|
||||
|
||||
# First load OR index invalid → send entire buffer
|
||||
if last_index < 0 or last_index >= total:
|
||||
logs = r.lrange(UI_LOG_KEY, 0, -1)
|
||||
return logs, total
|
||||
|
||||
# Only new logs
|
||||
new_lines = r.lrange(UI_LOG_KEY, last_index + 1, -1)
|
||||
return new_lines, total
|
||||
@ -1,36 +0,0 @@
|
||||
# scraper/utils.py
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def load_replacements(path="text_replacements.txt") -> dict:
|
||||
"""
|
||||
Load key=value replacements from a simple text file.
|
||||
Lines beginning with # are ignored.
|
||||
"""
|
||||
fp = Path(path)
|
||||
if not fp.exists():
|
||||
return {}
|
||||
|
||||
repl = {}
|
||||
for line in fp.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
|
||||
if "=" in line:
|
||||
k, v = line.split("=", 1)
|
||||
repl[k.strip()] = v.strip()
|
||||
|
||||
return repl
|
||||
|
||||
|
||||
def clean_text(raw: str, repl_dict: dict) -> str:
|
||||
"""
|
||||
Cleans text using user-defined replacements.
|
||||
"""
|
||||
txt = raw
|
||||
|
||||
for k, v in repl_dict.items():
|
||||
txt = txt.replace(k, v)
|
||||
|
||||
return txt.strip()
|
||||
@ -0,0 +1,272 @@
|
||||
# ============================================================
|
||||
# File: scraper/utils/state_sync.py
|
||||
# Purpose:
|
||||
# State inspection + optional sync logic for unified book_idx model.
|
||||
# Generates full book-card compatible dicts for debug UI.
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import redis
|
||||
from db.db import get_db
|
||||
|
||||
|
||||
def _build_card(sqlite_row, redis_state, merged):
|
||||
"""
|
||||
Creates a dict that matches the fields required by components/bookcard.html:
|
||||
b.book_idx
|
||||
b.title
|
||||
b.author
|
||||
b.cover_path
|
||||
b.status
|
||||
b.created_at
|
||||
b.download_done
|
||||
b.download_total
|
||||
b.audio_done
|
||||
b.audio_total
|
||||
"""
|
||||
|
||||
return {
|
||||
"book_idx": sqlite_row.get("book_idx"),
|
||||
"title": sqlite_row.get("title") or "Unknown",
|
||||
"author": sqlite_row.get("author"),
|
||||
"cover_path": sqlite_row.get("cover_path"),
|
||||
# Use merged status (Redis > SQLite)
|
||||
"status": merged.get("status") or sqlite_row.get("status") or "unknown",
|
||||
# Meta
|
||||
"created_at": sqlite_row.get("created_at"),
|
||||
# Download counters
|
||||
"download_done": merged.get("downloaded", 0),
|
||||
"download_total": merged.get("chapters_total", 0),
|
||||
# Audio counters
|
||||
"audio_done": merged.get("audio_done", 0),
|
||||
"audio_total": merged.get("chapters_total", 0),
|
||||
}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# INSPECT ONLY — NO WRITES
|
||||
# ============================================================
|
||||
def inspect_books_state_depecrated():
|
||||
"""
|
||||
Reads all books from SQLite and fetches Redis progress.
|
||||
Builds:
|
||||
• entry.sqlite
|
||||
• entry.redis
|
||||
• entry.would_merge_to
|
||||
• entry.card (book-card compatible)
|
||||
"""
|
||||
|
||||
r = redis.Redis.from_url(os.getenv("REDIS_BROKER"), decode_responses=True)
|
||||
db = get_db()
|
||||
cur = db.cursor()
|
||||
|
||||
cur.execute("SELECT * FROM books")
|
||||
rows = cur.fetchall()
|
||||
|
||||
results = []
|
||||
|
||||
for row in rows:
|
||||
sqlite_row = dict(row)
|
||||
book_idx = sqlite_row["book_idx"]
|
||||
|
||||
redis_key = f"book:{book_idx}:state"
|
||||
redis_state = r.hgetall(redis_key) or {}
|
||||
|
||||
# ================================
|
||||
# DRY-RUN MERGE LOGIC
|
||||
# ================================
|
||||
merged = sqlite_row.copy()
|
||||
|
||||
if redis_state:
|
||||
|
||||
merged["downloaded"] = int(
|
||||
redis_state.get("chapters_download_done", merged.get("downloaded", 0))
|
||||
)
|
||||
|
||||
merged["parsed"] = int(
|
||||
redis_state.get("chapters_parsed_done", merged.get("parsed", 0))
|
||||
)
|
||||
|
||||
merged["audio_done"] = int(
|
||||
redis_state.get("audio_done", merged.get("audio_done", 0))
|
||||
)
|
||||
|
||||
merged["chapters_total"] = int(
|
||||
redis_state.get("chapters_total", merged.get("chapters_total", 0))
|
||||
)
|
||||
|
||||
merged["status"] = redis_state.get(
|
||||
"status", merged.get("status", "unknown")
|
||||
)
|
||||
|
||||
# ================================
|
||||
# Build book-card data
|
||||
# ================================
|
||||
card = _build_card(sqlite_row, redis_state, merged)
|
||||
|
||||
# ================================
|
||||
# Append final result entry
|
||||
# ================================
|
||||
results.append(
|
||||
{
|
||||
"book_idx": book_idx,
|
||||
"title": sqlite_row.get("title"),
|
||||
"sqlite": sqlite_row,
|
||||
"redis": redis_state,
|
||||
"would_merge_to": merged,
|
||||
"card": card,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ============================================================
|
||||
# INSPECT ONLY — NO WRITES
|
||||
# ============================================================
|
||||
def inspect_books_state():
|
||||
"""
|
||||
Reads canonical book state from repository.
|
||||
Builds:
|
||||
• entry.sqlite
|
||||
• entry.redis
|
||||
• entry.would_merge_to
|
||||
• entry.card (book-card compatible)
|
||||
"""
|
||||
|
||||
from db.repository import get_book_state
|
||||
from db.db import get_db
|
||||
|
||||
db = get_db()
|
||||
cur = db.cursor()
|
||||
|
||||
# Alleen nodig om te weten *welke* books er zijn
|
||||
cur.execute("SELECT book_idx FROM books")
|
||||
rows = cur.fetchall()
|
||||
|
||||
results = []
|
||||
|
||||
for row in rows:
|
||||
book_idx = row["book_idx"]
|
||||
|
||||
# --------------------------------
|
||||
# Canonical state (ENIGE waarheid)
|
||||
# --------------------------------
|
||||
state = get_book_state(book_idx)
|
||||
|
||||
# SQLite-view = alleen SQLite-kolommen
|
||||
sqlite_view = {
|
||||
k: v
|
||||
for k, v in state.items()
|
||||
if k
|
||||
in (
|
||||
"book_idx",
|
||||
"title",
|
||||
"author",
|
||||
"description",
|
||||
"cover_path",
|
||||
"book_url",
|
||||
"chapters_total",
|
||||
"status",
|
||||
"downloaded",
|
||||
"parsed",
|
||||
"audio_done",
|
||||
"created_at",
|
||||
"processdate",
|
||||
"last_update",
|
||||
)
|
||||
}
|
||||
|
||||
# Redis-view = alleen Redis counters/status
|
||||
redis_view = {
|
||||
k: v
|
||||
for k, v in state.items()
|
||||
if k.startswith("chapters_")
|
||||
or k in ("status", "audio_done", "audio_skipped")
|
||||
}
|
||||
|
||||
merged = state # letterlijk de canonieke state
|
||||
|
||||
card = _build_card(sqlite_view, redis_view, merged)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"book_idx": book_idx,
|
||||
"title": state.get("title"),
|
||||
"sqlite": sqlite_view,
|
||||
"redis": redis_view,
|
||||
"would_merge_to": merged,
|
||||
"card": card,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ============================================================
|
||||
# SYNC REDIS → SQLITE (writes)
|
||||
# ============================================================
|
||||
def sync_books_from_redis():
|
||||
"""
|
||||
Writes Redis progress values back into SQLite.
|
||||
Uses unified book_idx as identifier.
|
||||
"""
|
||||
|
||||
r = redis.Redis.from_url(os.getenv("REDIS_BROKER"), decode_responses=True)
|
||||
db = get_db()
|
||||
cur = db.cursor()
|
||||
|
||||
cur.execute("SELECT * FROM books")
|
||||
rows = cur.fetchall()
|
||||
|
||||
results = []
|
||||
|
||||
for row in rows:
|
||||
before = dict(row)
|
||||
book_idx = before["book_idx"]
|
||||
|
||||
redis_key = f"book:{book_idx}:state"
|
||||
redis_state = r.hgetall(redis_key)
|
||||
|
||||
if not redis_state:
|
||||
results.append(
|
||||
{
|
||||
"book_idx": book_idx,
|
||||
"before": before,
|
||||
"redis": {},
|
||||
"after": before,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
# Extract progress from Redis
|
||||
downloaded = int(redis_state.get("chapters_download_done", 0))
|
||||
parsed = int(redis_state.get("chapters_parsed_done", 0))
|
||||
audio_done = int(redis_state.get("audio_done", 0))
|
||||
total = int(redis_state.get("chapters_total", 0))
|
||||
status = redis_state.get("status", before.get("status"))
|
||||
|
||||
# Update SQLite
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE books
|
||||
SET downloaded = ?, parsed = ?, audio_done = ?, chapters_total = ?, status = ?, last_update = datetime('now')
|
||||
WHERE book_idx = ?
|
||||
""",
|
||||
(downloaded, parsed, audio_done, total, status, book_idx),
|
||||
)
|
||||
db.commit()
|
||||
|
||||
cur.execute("SELECT * FROM books WHERE book_idx = ?", (book_idx,))
|
||||
after = dict(cur.fetchone())
|
||||
|
||||
results.append(
|
||||
{
|
||||
"book_idx": book_idx,
|
||||
"before": before,
|
||||
"redis": redis_state,
|
||||
"after": after,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
@ -0,0 +1,114 @@
|
||||
# ============================================================
|
||||
# File: scraper/utils.py
|
||||
# Purpose:
|
||||
# Centralised replacement loader + text cleaner
|
||||
# using 3 replacement categories:
|
||||
# 1) HTML replacements
|
||||
# 2) Encoding replacements
|
||||
# 3) Junk-term replacements (generic "noise" phrases)
|
||||
#
|
||||
# Nothing in this file contains hardcoded cleanup rules.
|
||||
# EVERYTHING comes from replacement files ONLY.
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Generic key=value replacement loader
|
||||
# ------------------------------------------------------------
|
||||
def load_replacement_file(path: Path) -> dict:
|
||||
"""
|
||||
Loads key=value pairs from a file.
|
||||
Missing file → {}.
|
||||
Ignores empty lines and lines starting with '#'.
|
||||
"""
|
||||
if not path.exists():
|
||||
return {}
|
||||
|
||||
repl = {}
|
||||
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
|
||||
if "=" in line:
|
||||
key, val = line.split("=", 1)
|
||||
repl[key.strip()] = val.strip()
|
||||
|
||||
return repl
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Load all categories (HTML → encoding → junk)
|
||||
# Order matters: later overrides earlier.
|
||||
# ------------------------------------------------------------
|
||||
def load_all_replacements() -> dict:
|
||||
root = Path(__file__).parent / "replacements"
|
||||
|
||||
html_file = root / "html.txt"
|
||||
enc_file = root / "encoding.txt"
|
||||
junk_file = root / "junk.txt"
|
||||
|
||||
repl = {}
|
||||
repl.update(load_replacement_file(html_file))
|
||||
repl.update(load_replacement_file(enc_file))
|
||||
repl.update(load_replacement_file(junk_file))
|
||||
|
||||
return repl
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Legacy compatibility wrapper
|
||||
# Many modules still import: from scraper.utils import load_replacements
|
||||
# This wrapper keeps everything working.
|
||||
# ------------------------------------------------------------
|
||||
def load_replacements(filepath=None) -> dict:
|
||||
"""
|
||||
Backward-compatible alias.
|
||||
- If called with no filepath → return merged replacements.
|
||||
- If called with a filepath → load that one file only.
|
||||
"""
|
||||
if filepath is None:
|
||||
return load_all_replacements()
|
||||
else:
|
||||
# Allow explicit loading of a single file
|
||||
path = Path(filepath)
|
||||
return load_replacement_file(path)
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Clean text using loaded replacements
|
||||
# ------------------------------------------------------------
|
||||
def clean_text(raw: str, repl: dict) -> str:
|
||||
"""
|
||||
Apply replacements and basic whitespace normalisation.
|
||||
No hardcoded rules live here.
|
||||
"""
|
||||
if not raw:
|
||||
return ""
|
||||
|
||||
txt = raw.replace("\r", "")
|
||||
|
||||
# Apply loaded replacements
|
||||
for key, val in repl.items():
|
||||
# print(f"Replacing: {key} → {val}")
|
||||
txt = txt.replace(key, val)
|
||||
|
||||
# Collapse 3+ blank lines → max 1
|
||||
txt = re.sub(r"\n{3,}", "\n\n", txt)
|
||||
|
||||
return txt.strip()
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Determine chapter save path
|
||||
# ------------------------------------------------------------
|
||||
def get_save_path(chapter_num: int, base_path: str) -> str:
|
||||
filename = f"{chapter_num:04d}.txt"
|
||||
return os.path.join(base_path, filename)
|
||||
@ -0,0 +1,66 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo ""
|
||||
echo "====================================================="
|
||||
echo " STARTING LOCAL macOS AUDIO WORKER"
|
||||
echo "====================================================="
|
||||
echo ""
|
||||
|
||||
# ------------------------------------------------------
|
||||
# Load .env so REDIS_BROKER_LOCAL becomes available
|
||||
# ------------------------------------------------------
|
||||
if [ -f ".env" ]; then
|
||||
set -o allexport
|
||||
source .env
|
||||
set +o allexport
|
||||
fi
|
||||
|
||||
# ------------------------------------------------------
|
||||
# Override Redis to local instance for macOS
|
||||
# ------------------------------------------------------
|
||||
export REDIS_BROKER="$REDIS_BROKER_LOCAL"
|
||||
export REDIS_BACKEND="$REDIS_BACKEND_LOCAL"
|
||||
|
||||
echo "[AUDIO] Redis override:"
|
||||
echo " REDIS_BROKER=$REDIS_BROKER"
|
||||
echo " REDIS_BACKEND=$REDIS_BACKEND"
|
||||
echo ""
|
||||
|
||||
# ------------------------------------------------------
|
||||
# Create venv if needed
|
||||
# ------------------------------------------------------
|
||||
if [ ! -d ".venv" ]; then
|
||||
echo "[AUDIO] No .venv found — creating virtualenv..."
|
||||
python3 -m venv .venv
|
||||
else
|
||||
echo "[AUDIO] Existing .venv found"
|
||||
fi
|
||||
|
||||
# Activate virtualenv
|
||||
echo "[AUDIO] Activating .venv"
|
||||
source .venv/bin/activate
|
||||
|
||||
# ------------------------------------------------------
|
||||
# Install requirements
|
||||
# ------------------------------------------------------
|
||||
REQ="requirements.audio.txt"
|
||||
|
||||
if [ ! -f "$REQ" ]; then
|
||||
echo "[AUDIO] ERROR — $REQ not found!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[AUDIO] Installing audio requirements..."
|
||||
pip install -r "$REQ"
|
||||
|
||||
# Celery must be installed locally too
|
||||
echo "[AUDIO] Ensuring Celery installed..."
|
||||
pip install celery
|
||||
|
||||
# ------------------------------------------------------
|
||||
# Start the worker
|
||||
# ------------------------------------------------------
|
||||
echo ""
|
||||
echo "[AUDIO] Starting audio worker..."
|
||||
python3 audio_worker_local.py
|
||||
@ -0,0 +1,310 @@
|
||||
/* =======================================================================
|
||||
File: static/css/bookcard.css
|
||||
Purpose:
|
||||
Styling voor registered book cards:
|
||||
- status kleuren
|
||||
- badges
|
||||
- start/abort/statuscheck
|
||||
- progress bars
|
||||
======================================================================= */
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
GRID WRAPPER
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.registered-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(340px, 1fr));
|
||||
gap: 20px;
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
BOOK CARD BASE
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.book-card {
|
||||
position: relative;
|
||||
display: grid;
|
||||
grid-template-columns: 90px auto;
|
||||
gap: 15px;
|
||||
|
||||
padding: 15px;
|
||||
background: #fff;
|
||||
border-radius: 10px;
|
||||
border: 1px solid #e5e5e5;
|
||||
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
|
||||
|
||||
transition: border-color 0.25s ease, box-shadow 0.25s ease;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
STATUS COLORS (BOOK CARD BORDER)
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
/* Downloading / actief bezig */
|
||||
.book-card.downloading {
|
||||
border-color: #ff9500;
|
||||
box-shadow: 0 0 6px rgba(255, 149, 0, 0.35);
|
||||
}
|
||||
|
||||
/* Audio fase */
|
||||
.book-card.audio {
|
||||
border-color: #ffca28;
|
||||
box-shadow: 0 0 6px rgba(255, 202, 40, 0.35);
|
||||
}
|
||||
|
||||
/* Volledig klaar */
|
||||
.book-card.done {
|
||||
border: 2px solid #4caf50;
|
||||
box-shadow: 0 0 6px rgba(76, 175, 80, 0.35);
|
||||
}
|
||||
|
||||
/* Afgebroken */
|
||||
.book-card.aborted {
|
||||
border-color: #ff3b30;
|
||||
box-shadow: 0 0 6px rgba(255, 59, 48, 0.35);
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
COVER
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.book-cover {
|
||||
width: 90px;
|
||||
}
|
||||
|
||||
.book-img {
|
||||
width: 90px;
|
||||
height: 130px;
|
||||
object-fit: cover;
|
||||
border-radius: 4px;
|
||||
background: #f4f4f4;
|
||||
}
|
||||
|
||||
.placeholder {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
color: #777;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
META
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.book-meta {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: space-between;
|
||||
}
|
||||
|
||||
.book-title {
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.book-author {
|
||||
font-size: 14px;
|
||||
color: #444;
|
||||
margin-bottom: 6px;
|
||||
}
|
||||
|
||||
.book-created {
|
||||
font-size: 12px;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
ACTION BUTTONS
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.book-actions {
|
||||
display: flex;
|
||||
justify-content: flex-end;
|
||||
gap: 10px;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
.icon-btn {
|
||||
width: 34px;
|
||||
height: 34px;
|
||||
border: none;
|
||||
border-radius: 8px;
|
||||
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
|
||||
font-size: 16px;
|
||||
color: #fff;
|
||||
cursor: pointer;
|
||||
|
||||
transition: background 0.15s ease, transform 0.1s ease;
|
||||
}
|
||||
|
||||
/* Start */
|
||||
.icon-start {
|
||||
background: #2d8a3d;
|
||||
}
|
||||
.icon-start:hover {
|
||||
background: #226c30;
|
||||
transform: scale(1.05);
|
||||
}
|
||||
.icon-start:disabled {
|
||||
background: #9bbb9f;
|
||||
cursor: not-allowed;
|
||||
opacity: 0.5;
|
||||
}
|
||||
|
||||
/* Abort */
|
||||
.icon-abort {
|
||||
background: #c62828;
|
||||
}
|
||||
.icon-abort:hover {
|
||||
background: #a31f1f;
|
||||
transform: scale(1.05);
|
||||
}
|
||||
.icon-abort:disabled {
|
||||
background: #d8a0a0;
|
||||
cursor: not-allowed;
|
||||
opacity: 0.5;
|
||||
}
|
||||
|
||||
/* Hide */
|
||||
.hide-form {
|
||||
position: absolute;
|
||||
top: 6px;
|
||||
right: 6px;
|
||||
}
|
||||
.icon-hide {
|
||||
background: #777;
|
||||
}
|
||||
.icon-hide:hover {
|
||||
background: #555;
|
||||
}
|
||||
|
||||
/* Statuscheck */
|
||||
.statuscheck-btn {
|
||||
background-color: #444;
|
||||
color: #fff;
|
||||
border: 1px solid #666;
|
||||
margin-left: 4px;
|
||||
padding: 4px 8px;
|
||||
border-radius: 6px;
|
||||
font-size: 12px;
|
||||
cursor: pointer;
|
||||
}
|
||||
.statuscheck-btn:hover {
|
||||
background-color: #333;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
PROGRESS (FULL WIDTH)
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.book-progress {
|
||||
grid-column: 1 / -1;
|
||||
margin-top: 12px;
|
||||
padding: 10px 12px;
|
||||
background: #f6f6f6;
|
||||
border-radius: 8px;
|
||||
}
|
||||
|
||||
.progress-row {
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
|
||||
.progress-label {
|
||||
font-size: 12px;
|
||||
margin-bottom: 4px;
|
||||
color: #444;
|
||||
}
|
||||
|
||||
/* BAR */
|
||||
.progressbar {
|
||||
position: relative;
|
||||
width: 100%;
|
||||
height: 14px;
|
||||
background: #ddd;
|
||||
border-radius: 7px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.progressbar-fill {
|
||||
height: 100%;
|
||||
transition: width 0.4s ease;
|
||||
}
|
||||
|
||||
/* Download */
|
||||
.progressbar-fill.download {
|
||||
background: #2196f3;
|
||||
}
|
||||
|
||||
/* Audio */
|
||||
.progressbar-fill.audio {
|
||||
background: #4caf50;
|
||||
}
|
||||
|
||||
/* TEXT IN BAR */
|
||||
.progressbar-text {
|
||||
position: absolute;
|
||||
inset: 0;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
|
||||
font-size: 11px;
|
||||
font-weight: 600;
|
||||
color: #fff;
|
||||
text-shadow: 0 1px 2px rgba(0, 0, 0, 0.6);
|
||||
pointer-events: none;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
STATUS BADGE
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.status-badge {
|
||||
display: inline-block;
|
||||
margin-bottom: 6px;
|
||||
padding: 2px 8px;
|
||||
font-size: 11px;
|
||||
font-weight: 600;
|
||||
border-radius: 10px;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.5px;
|
||||
cursor: default;
|
||||
}
|
||||
|
||||
/* DONE */
|
||||
.status-badge.status-done {
|
||||
background-color: #e6f4ea;
|
||||
color: #2e7d32;
|
||||
border: 1px solid #4caf50;
|
||||
}
|
||||
|
||||
/* AUDIO */
|
||||
.status-badge.status-audio {
|
||||
background-color: #fff8e1;
|
||||
color: #8d6e00;
|
||||
border: 1px solid #ffca28;
|
||||
}
|
||||
|
||||
/* DOWNLOADING */
|
||||
.status-badge.status-downloading {
|
||||
background-color: #e3f2fd;
|
||||
color: #1565c0;
|
||||
border: 1px solid #42a5f5;
|
||||
}
|
||||
|
||||
/* Statuscheck */
|
||||
.icon-statuscheck {
|
||||
background: #444;
|
||||
}
|
||||
|
||||
.icon-statuscheck:hover {
|
||||
background: #333;
|
||||
transform: scale(1.05);
|
||||
}
|
||||
@ -0,0 +1,312 @@
|
||||
/* =======================================================================
|
||||
File: static/css/dashboard.css
|
||||
Purpose:
|
||||
Clean full-width vertical dashboard layout with large log viewer.
|
||||
Book-card CSS is now moved to bookcard.css
|
||||
======================================================================= */
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
1) GENERAL PAGE LAYOUT
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.dashboard-container {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
width: 100%;
|
||||
max-width: 1200px;
|
||||
margin: 20px auto;
|
||||
padding: 0 20px;
|
||||
gap: 18px;
|
||||
}
|
||||
|
||||
.dashboard-section {
|
||||
background: #ffffff;
|
||||
padding: 16px;
|
||||
border-radius: 6px;
|
||||
border: 1px solid #ddd;
|
||||
}
|
||||
|
||||
.page-title {
|
||||
font-size: 22px;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
2) ACTIVE BOOK LIST (dashboard left panel)
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.book-list {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.book-list-empty {
|
||||
padding: 18px;
|
||||
text-align: center;
|
||||
color: #777;
|
||||
}
|
||||
|
||||
.book-list-item {
|
||||
padding: 12px 16px;
|
||||
background: #f7f7f7;
|
||||
border-radius: 6px;
|
||||
border: 1px solid #ccc;
|
||||
cursor: pointer;
|
||||
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 6px;
|
||||
|
||||
transition: background 0.2s, border-color 0.2s;
|
||||
}
|
||||
|
||||
.book-list-item:hover,
|
||||
.book-list-item.active {
|
||||
background: #eaf3ff;
|
||||
border-color: #1e88e5;
|
||||
}
|
||||
|
||||
.book-title {
|
||||
font-size: 16px;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.book-meta {
|
||||
font-size: 12px;
|
||||
color: #555;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
3) PROGRESS BOX
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.progress-box {
|
||||
background: #fafafa;
|
||||
border: 1px solid #ddd;
|
||||
padding: 8px;
|
||||
border-radius: 6px;
|
||||
}
|
||||
|
||||
.progress-header h2 {
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
|
||||
.progress-subtitle {
|
||||
font-size: 14px;
|
||||
color: #333;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.progress-bookid {
|
||||
font-size: 12px;
|
||||
color: #777;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
.progress-bar {
|
||||
height: 14px;
|
||||
background: #ddd;
|
||||
border-radius: 6px;
|
||||
overflow: hidden;
|
||||
margin-bottom: 6px;
|
||||
}
|
||||
|
||||
.progress-bar-fill {
|
||||
height: 100%;
|
||||
background: #1e88e5;
|
||||
}
|
||||
|
||||
.progress-bar-fill.audio-fill {
|
||||
background: #e65100;
|
||||
}
|
||||
|
||||
.progress-stats {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
font-size: 12px;
|
||||
color: #444;
|
||||
margin-top: 4px;
|
||||
}
|
||||
|
||||
.book-abort-area {
|
||||
margin-top: 10px;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
.abort-btn {
|
||||
padding: 6px 12px;
|
||||
border-radius: 4px;
|
||||
border: 1px solid #cc0000;
|
||||
background: #ff4444;
|
||||
color: white;
|
||||
font-size: 12px;
|
||||
cursor: pointer;
|
||||
transition: background 0.2s, border-color 0.2s;
|
||||
}
|
||||
|
||||
.abort-btn:hover {
|
||||
background: #ff2222;
|
||||
border-color: #aa0000;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
4) LOG VIEWER
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.log-viewer {
|
||||
width: 100%;
|
||||
max-width: 100%;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.log-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.log-filters {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.log-output {
|
||||
flex: 1;
|
||||
width: 100%;
|
||||
max-width: 100%;
|
||||
min-height: 60vh;
|
||||
max-height: 75vh;
|
||||
|
||||
overflow-y: auto;
|
||||
overflow-x: hidden;
|
||||
|
||||
background: #000;
|
||||
color: #00ff66;
|
||||
border: 1px solid #0f0;
|
||||
border-radius: 6px;
|
||||
padding: 12px;
|
||||
|
||||
font-family: "SF Mono", "Consolas", "Courier New", monospace;
|
||||
font-size: 13px;
|
||||
line-height: 1.35;
|
||||
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.log-line {
|
||||
white-space: pre-wrap;
|
||||
padding: 2px 0;
|
||||
}
|
||||
.log-line.default {
|
||||
color: #00ff66;
|
||||
}
|
||||
.log-line.dl {
|
||||
color: #00ccff;
|
||||
}
|
||||
.log-line.parse {
|
||||
color: #ffaa00;
|
||||
}
|
||||
.log-line.save {
|
||||
color: #ffdd33;
|
||||
}
|
||||
.log-line.audio {
|
||||
color: #ff66ff;
|
||||
}
|
||||
.log-line.ctrl {
|
||||
color: #66aaff;
|
||||
}
|
||||
.log-line.error {
|
||||
color: #ff3333;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
5) PLACEHOLDER / FOOTER
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.dashboard-placeholder {
|
||||
font-size: 15px;
|
||||
padding: 20px;
|
||||
text-align: center;
|
||||
color: #777;
|
||||
}
|
||||
|
||||
.footer {
|
||||
text-align: center;
|
||||
padding: 12px;
|
||||
color: #666;
|
||||
margin-top: 25px;
|
||||
font-size: 12px;
|
||||
border-top: 1px solid #ddd;
|
||||
}
|
||||
/* -----------------------------
|
||||
DROPDOWN NAVIGATION
|
||||
------------------------------ */
|
||||
|
||||
/* Container for dropdown */
|
||||
.nav-dropdown {
|
||||
position: relative;
|
||||
}
|
||||
|
||||
/* The clickable label ("Tools ▾") */
|
||||
.nav-dropdown > .nav-item {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
/* Hide dropdown by default */
|
||||
.dropdown-menu {
|
||||
display: none;
|
||||
position: absolute;
|
||||
top: 100%;
|
||||
right: 0;
|
||||
background: #fff; /* zelfde achtergrond als navbar */
|
||||
border: 1px solid #ddd;
|
||||
padding: 8px 0;
|
||||
margin: 0;
|
||||
list-style: none; /* verwijder bolletjes */
|
||||
border-radius: 4px;
|
||||
min-width: 160px;
|
||||
z-index: 1000;
|
||||
}
|
||||
|
||||
/* Show dropdown when hovering over parent */
|
||||
.nav-dropdown:hover .dropdown-menu {
|
||||
display: block;
|
||||
}
|
||||
|
||||
/* Menu item styling */
|
||||
.dropdown-menu li {
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.dropdown-menu li a {
|
||||
display: block;
|
||||
padding: 8px 16px;
|
||||
white-space: nowrap;
|
||||
color: #333;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
/* Hover state */
|
||||
.dropdown-menu li a:hover {
|
||||
background: #f0f0f0;
|
||||
}
|
||||
|
||||
table.kv {
|
||||
border-collapse: collapse;
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
|
||||
table.kv th {
|
||||
text-align: left;
|
||||
padding-right: 12px;
|
||||
color: #777;
|
||||
font-weight: normal;
|
||||
}
|
||||
|
||||
table.kv td {
|
||||
font-weight: 500;
|
||||
}
|
||||
@ -0,0 +1,160 @@
|
||||
/* =======================================================================
|
||||
File: static/css/style.css
|
||||
Purpose:
|
||||
Global base styling for all pages.
|
||||
Includes typography, buttons, forms, layout primitives.
|
||||
======================================================================= */
|
||||
|
||||
/* ------------------------------
|
||||
RESET / BASE
|
||||
------------------------------ */
|
||||
|
||||
html,
|
||||
body {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
font-family: Arial, Helvetica, sans-serif;
|
||||
background: #f5f6fa;
|
||||
color: #222;
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 1100px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
h1,
|
||||
h2,
|
||||
h3 {
|
||||
margin: 0 0 15px 0;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
a {
|
||||
color: #1e88e5;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
/* ------------------------------
|
||||
BUTTONS
|
||||
------------------------------ */
|
||||
|
||||
.btn-primary {
|
||||
background: #1e88e5;
|
||||
color: #fff;
|
||||
padding: 10px 18px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
font-size: 15px;
|
||||
transition: background 0.2s ease;
|
||||
}
|
||||
|
||||
.btn-primary:hover {
|
||||
background: #1669b9;
|
||||
}
|
||||
|
||||
.btn-small {
|
||||
padding: 5px 10px;
|
||||
background: #ccc;
|
||||
border-radius: 4px;
|
||||
border: none;
|
||||
font-size: 13px;
|
||||
}
|
||||
|
||||
.btn-small:hover {
|
||||
background: #bbb;
|
||||
}
|
||||
|
||||
/* ------------------------------
|
||||
FORM ELEMENTS
|
||||
------------------------------ */
|
||||
|
||||
.url-form {
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
flex-direction: column;
|
||||
max-width: 550px;
|
||||
}
|
||||
|
||||
.url-label {
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.url-input {
|
||||
padding: 10px;
|
||||
font-size: 15px;
|
||||
border: 1px solid #bbb;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
.url-submit {
|
||||
align-self: flex-start;
|
||||
}
|
||||
|
||||
/* ------------------------------
|
||||
NAVBAR
|
||||
------------------------------ */
|
||||
|
||||
.navbar {
|
||||
background: #ffffff;
|
||||
border-bottom: 1px solid #ddd;
|
||||
padding: 12px 20px;
|
||||
}
|
||||
|
||||
.nav-inner {
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
}
|
||||
|
||||
.nav-brand a {
|
||||
font-size: 20px;
|
||||
font-weight: bold;
|
||||
color: #1e88e5;
|
||||
}
|
||||
|
||||
.nav-links {
|
||||
list-style: none;
|
||||
display: flex;
|
||||
gap: 25px;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.nav-item {
|
||||
font-size: 15px;
|
||||
color: #333;
|
||||
}
|
||||
|
||||
.nav-item:hover {
|
||||
color: #1e88e5;
|
||||
}
|
||||
|
||||
/* ------------------------------
|
||||
LANDING PAGE
|
||||
------------------------------ */
|
||||
|
||||
.landing-container {
|
||||
max-width: 600px;
|
||||
margin: 40px auto;
|
||||
background: #fff;
|
||||
padding: 25px;
|
||||
border-radius: 6px;
|
||||
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
.landing-title {
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.landing-links {
|
||||
margin-top: 20px;
|
||||
}
|
||||
@ -0,0 +1,33 @@
|
||||
/* =======================================================================
|
||||
File: static/js/app.js
|
||||
Purpose:
|
||||
Global utility functions shared across all scripts.
|
||||
No page-specific logic here.
|
||||
======================================================================= */
|
||||
|
||||
// Shortcuts
|
||||
const $ = (sel, parent = document) => parent.querySelector(sel);
|
||||
const $$ = (sel, parent = document) => parent.querySelectorAll(sel);
|
||||
|
||||
// Safe log
|
||||
function dbg(...args) {
|
||||
console.log("[APP]", ...args);
|
||||
}
|
||||
|
||||
// AJAX helper
|
||||
async function apiGet(url) {
|
||||
try {
|
||||
const res = await fetch(url, { cache: "no-store" });
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
return await res.json();
|
||||
} catch (err) {
|
||||
console.error("API GET Error:", url, err);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-scroll utility
|
||||
function autoScroll(el) {
|
||||
if (!el) return;
|
||||
el.scrollTop = el.scrollHeight;
|
||||
}
|
||||
@ -0,0 +1,145 @@
|
||||
/* ============================================================
|
||||
File: static/js/bookcard_controller.js
|
||||
Purpose:
|
||||
Single owner for updating book-card DOM from merged state
|
||||
(would_merge_to)
|
||||
============================================================ */
|
||||
|
||||
console.log("[BOOKCARD] controller loaded");
|
||||
|
||||
/* ============================================================
|
||||
ENTRY POINT (called by state_updater.js)
|
||||
============================================================ */
|
||||
|
||||
function updateBookCardsFromState(stateList) {
|
||||
console.log("[BOOKCARD] updateBookCardsFromState called");
|
||||
|
||||
if (!Array.isArray(stateList)) {
|
||||
console.warn("[BOOKCARD] Invalid stateList", stateList);
|
||||
return;
|
||||
}
|
||||
|
||||
const stateById = {};
|
||||
|
||||
stateList.forEach((entry) => {
|
||||
const merged = entry.would_merge_to;
|
||||
if (!merged || merged.book_idx == null) {
|
||||
console.warn("[BOOKCARD] entry without merged/book_idx", entry);
|
||||
return;
|
||||
}
|
||||
stateById[String(merged.book_idx)] = merged;
|
||||
});
|
||||
|
||||
document.querySelectorAll(".book-card").forEach((card) => {
|
||||
const bookIdx = card.dataset.bookIdx;
|
||||
const state = stateById[bookIdx];
|
||||
|
||||
if (!state) {
|
||||
console.debug("[BOOKCARD] No state for book_idx", bookIdx);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log("[BOOKCARD] Updating card", bookIdx, state.status);
|
||||
updateSingleBookCard(card, state);
|
||||
});
|
||||
}
|
||||
|
||||
/* ============================================================
|
||||
SINGLE CARD UPDATE
|
||||
============================================================ */
|
||||
|
||||
function updateSingleBookCard(card, state) {
|
||||
console.log("[BOOKCARD] updateSingleBookCard", state.book_idx);
|
||||
|
||||
updateStatus(card, state);
|
||||
updateStatusBadge(card, state);
|
||||
updateButtons(card, state);
|
||||
updateProgress(card, state);
|
||||
}
|
||||
|
||||
/* ============================================================
|
||||
STATUS
|
||||
============================================================ */
|
||||
|
||||
function updateStatus(card, state) {
|
||||
console.log("[BOOKCARD][STATUS]", state.book_idx, "→", state.status);
|
||||
card.className = `book-card ${state.status || ""}`;
|
||||
}
|
||||
function updateStatusBadge(card, state) {
|
||||
const badge = card.querySelector(".status-badge");
|
||||
if (!badge) return;
|
||||
|
||||
const status = (state.status || "").toLowerCase();
|
||||
|
||||
badge.textContent = status.toUpperCase();
|
||||
badge.className = `status-badge status-${status}`;
|
||||
badge.title =
|
||||
{
|
||||
downloading: "Bezig met downloaden",
|
||||
audio: "Downloads compleet, audio wordt gegenereerd",
|
||||
done: "Alle chapters en audio zijn compleet",
|
||||
}[status] || "";
|
||||
}
|
||||
|
||||
/* ============================================================
|
||||
BUTTONS
|
||||
============================================================ */
|
||||
|
||||
function updateButtons(card, state) {
|
||||
const startBtn = card.querySelector(".icon-start");
|
||||
const abortBtn = card.querySelector(".icon-abort");
|
||||
|
||||
const busy = ["starting", "downloading", "parsing", "audio"];
|
||||
|
||||
console.log("[BOOKCARD][BUTTONS]", state.book_idx, "status:", state.status);
|
||||
|
||||
if (startBtn) {
|
||||
// startBtn.disabled = busy.includes(state.status);
|
||||
}
|
||||
|
||||
if (abortBtn) {
|
||||
abortBtn.disabled = !busy.includes(state.status);
|
||||
}
|
||||
}
|
||||
|
||||
/* ============================================================
|
||||
PROGRESS (DOWNLOAD + AUDIO)
|
||||
============================================================ */
|
||||
|
||||
function updateProgress(card, s) {
|
||||
const total = Number(s.chapters_total || 0);
|
||||
|
||||
// const downloadDone =
|
||||
// Number(s.chapters_download_done || 0) +
|
||||
// Number(s.chapters_download_skipped || 0);
|
||||
const downloadDone = Number(s.downloaded || 0);
|
||||
|
||||
const audioDone = Number(s.audio_done || 0) + Number(s.audio_skipped || 0);
|
||||
|
||||
const downloadPct =
|
||||
total > 0 ? Math.min((downloadDone / total) * 100, 100) : 0;
|
||||
|
||||
const audioPct = total > 0 ? Math.min((audioDone / total) * 100, 100) : 0;
|
||||
|
||||
console.log("[BOOKCARD][PROGRESS]", s.book_idx, {
|
||||
total,
|
||||
downloadDone,
|
||||
audioDone,
|
||||
downloadPct,
|
||||
audioPct,
|
||||
});
|
||||
|
||||
/* ---- DOWNLOAD ---- */
|
||||
const dlBar = card.querySelector('[data-field="download_pct"]');
|
||||
const dlText = card.querySelector('[data-field="download_text"]');
|
||||
|
||||
if (dlBar) dlBar.style.width = `${downloadPct}%`;
|
||||
if (dlText) dlText.textContent = `${downloadDone} / ${total}`;
|
||||
|
||||
/* ---- AUDIO ---- */
|
||||
const auBar = card.querySelector('[data-field="audio_pct"]');
|
||||
const auText = card.querySelector('[data-field="audio_text"]');
|
||||
|
||||
if (auBar) auBar.style.width = `${audioPct}%`;
|
||||
if (auText) auText.textContent = `${audioDone} / ${total}`;
|
||||
}
|
||||
@ -0,0 +1,178 @@
|
||||
/* =======================================================================
|
||||
File: static/js/dashboard.js
|
||||
Purpose:
|
||||
- Sidebar selectie
|
||||
- Start / Abort acties
|
||||
- UI status updates
|
||||
NOTE:
|
||||
- GEEN polling
|
||||
- state_updater.js is leidend
|
||||
======================================================================= */
|
||||
|
||||
console.log("[DASHBOARD] loaded");
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
Helpers
|
||||
--------------------------------------------------------- */
|
||||
async function apiGet(url) {
|
||||
console.log("[DASHBOARD][API] GET", url);
|
||||
try {
|
||||
const r = await fetch(url, { cache: "no-store" });
|
||||
if (!r.ok) {
|
||||
console.warn("[DASHBOARD][API] GET failed", url, r.status);
|
||||
return null;
|
||||
}
|
||||
return await r.json();
|
||||
} catch (e) {
|
||||
console.error("[DASHBOARD][API] GET error", url, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function safeUpdateLogs(data) {
|
||||
if (typeof window.updateLogs === "function") {
|
||||
console.log("[DASHBOARD] updateLogs()");
|
||||
window.updateLogs(data);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
State
|
||||
--------------------------------------------------------- */
|
||||
let ACTIVE_BOOK_IDX = null;
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
DOM READY
|
||||
--------------------------------------------------------- */
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
console.log("[DASHBOARD] DOMContentLoaded");
|
||||
|
||||
bindSidebar();
|
||||
bindBookCardButtons();
|
||||
|
||||
const first = document.querySelector(".book-list-item");
|
||||
if (first) {
|
||||
console.log("[DASHBOARD] auto-select", first.dataset.bookIdx);
|
||||
selectBook(first.dataset.bookIdx);
|
||||
}
|
||||
});
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
Sidebar
|
||||
--------------------------------------------------------- */
|
||||
function bindSidebar() {
|
||||
console.log("[DASHBOARD] bindSidebar()");
|
||||
document.querySelectorAll(".book-list-item").forEach((item) => {
|
||||
item.onclick = () => selectBook(item.dataset.bookIdx);
|
||||
});
|
||||
}
|
||||
|
||||
function selectBook(bookIdx) {
|
||||
if (!bookIdx || bookIdx === ACTIVE_BOOK_IDX) return;
|
||||
|
||||
ACTIVE_BOOK_IDX = bookIdx;
|
||||
console.log("[DASHBOARD] selectBook", bookIdx);
|
||||
|
||||
document.querySelectorAll(".book-list-item").forEach((el) => {
|
||||
el.classList.toggle("active", el.dataset.bookIdx === bookIdx);
|
||||
});
|
||||
|
||||
refreshBook(bookIdx);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
Book refresh (NO POLLING)
|
||||
--------------------------------------------------------- */
|
||||
async function refreshBook(bookIdx) {
|
||||
console.log("[DASHBOARD] refreshBook", bookIdx);
|
||||
|
||||
const logs = await apiGet(`/api/book/${bookIdx}/logs`);
|
||||
if (logs) safeUpdateLogs(logs);
|
||||
|
||||
refreshBookCards();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
Bookcard buttons
|
||||
--------------------------------------------------------- */
|
||||
function bindBookCardButtons() {
|
||||
console.log("[DASHBOARD] bindBookCardButtons()");
|
||||
|
||||
document.querySelectorAll(".icon-start").forEach((btn) => {
|
||||
if (btn.dataset.bound) return;
|
||||
btn.dataset.bound = "1";
|
||||
|
||||
btn.onclick = (e) => {
|
||||
e.preventDefault();
|
||||
const card = btn.closest(".book-card");
|
||||
if (!card) return;
|
||||
startBook(card.dataset.bookIdx);
|
||||
};
|
||||
});
|
||||
|
||||
document.querySelectorAll(".icon-abort").forEach((btn) => {
|
||||
if (btn.dataset.bound) return;
|
||||
btn.dataset.bound = "1";
|
||||
|
||||
btn.onclick = (e) => {
|
||||
e.preventDefault();
|
||||
const card = btn.closest(".book-card");
|
||||
if (!card) return;
|
||||
abortBook(card.dataset.bookIdx);
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
START
|
||||
--------------------------------------------------------- */
|
||||
function startBook(bookIdx) {
|
||||
console.log("[DASHBOARD] START", bookIdx);
|
||||
|
||||
fetch("/start", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/x-www-form-urlencoded" },
|
||||
body: `book_idx=${bookIdx}`,
|
||||
}).then(() => refreshBook(bookIdx));
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
ABORT
|
||||
--------------------------------------------------------- */
|
||||
function abortBook(bookIdx) {
|
||||
if (!confirm(`Abort book ${bookIdx}?`)) return;
|
||||
|
||||
console.log("[DASHBOARD] ABORT", bookIdx);
|
||||
|
||||
fetch(`/abort/${bookIdx}`, { method: "POST" }).then(() =>
|
||||
refreshBook(bookIdx)
|
||||
);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
Bookcard UI refresh (non-progress)
|
||||
--------------------------------------------------------- */
|
||||
async function refreshBookCards() {
|
||||
console.log("[DASHBOARD] refreshBookCards()");
|
||||
const books = await apiGet("/api/books");
|
||||
if (!books) return;
|
||||
|
||||
document.querySelectorAll(".book-card").forEach((card) => {
|
||||
const idx = card.dataset.bookIdx;
|
||||
const info = books.find((b) => b.book_idx === idx);
|
||||
if (!info) return;
|
||||
|
||||
console.log("[DASHBOARD] card status", idx, info.status);
|
||||
card.className = `book-card ${info.status}`;
|
||||
|
||||
const abortBtn = card.querySelector(".icon-abort");
|
||||
if (abortBtn) {
|
||||
abortBtn.disabled = ![
|
||||
"processing",
|
||||
"downloading",
|
||||
"parsing",
|
||||
"audio",
|
||||
].includes(info.status);
|
||||
}
|
||||
});
|
||||
}
|
||||
@ -0,0 +1,13 @@
|
||||
/* =======================================================================
|
||||
File: static/js/helpers.js
|
||||
Purpose:
|
||||
Shared DOM helpers for all JS files.
|
||||
======================================================================= */
|
||||
|
||||
window.$ = (sel) => document.querySelector(sel);
|
||||
window.$$ = (sel) => document.querySelectorAll(sel);
|
||||
|
||||
window.autoScroll = function (el) {
|
||||
if (!el) return;
|
||||
el.scrollTop = el.scrollHeight;
|
||||
};
|
||||
@ -0,0 +1,101 @@
|
||||
/* ============================================================
|
||||
File: static/js/inspect_state.js
|
||||
Purpose:
|
||||
- Receive merged state via state_updater.js
|
||||
- Update ONLY the right-side state tables
|
||||
- NO polling, NO fetch
|
||||
============================================================ */
|
||||
|
||||
console.log("[inspect_state] JS loaded (subscriber mode)");
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
State subscription
|
||||
------------------------------------------------------------ */
|
||||
|
||||
window.addEventListener("state:update", (e) => {
|
||||
const entries = e.detail;
|
||||
|
||||
if (!Array.isArray(entries)) {
|
||||
console.warn("[inspect_state] state:update payload is not array", entries);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log("[inspect_state] state:update received entries:", entries.length);
|
||||
updateInspectTables(entries);
|
||||
});
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Update tables
|
||||
------------------------------------------------------------ */
|
||||
|
||||
function updateInspectTables(entries) {
|
||||
console.log("[inspect_state] updating tables");
|
||||
|
||||
entries.forEach((entry) => {
|
||||
const bookIdx = entry.book_idx;
|
||||
if (bookIdx == null) {
|
||||
console.warn("[inspect_state] entry without book_idx", entry);
|
||||
return;
|
||||
}
|
||||
|
||||
const block = document.querySelector(
|
||||
`.state-block[data-book-idx="${bookIdx}"]`
|
||||
);
|
||||
if (!block) {
|
||||
console.warn("[inspect_state] no state-block for book_idx", bookIdx);
|
||||
return;
|
||||
}
|
||||
|
||||
const table = block.querySelector(".state-table");
|
||||
if (!table) {
|
||||
console.warn("[inspect_state] no state-table for book_idx", bookIdx);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log("[inspect_state] updating table for book_idx", bookIdx);
|
||||
|
||||
const sql = entry.sqlite || {};
|
||||
const redis = entry.redis || {};
|
||||
const merged = entry.would_merge_to || {};
|
||||
|
||||
table.innerHTML = `
|
||||
<tr>
|
||||
<th>Field</th>
|
||||
<th>SQLite</th>
|
||||
<th>Redis</th>
|
||||
<th>Merged</th>
|
||||
</tr>
|
||||
${row("status", sql, redis, merged)}
|
||||
${row("chapters_total", sql, redis, merged)}
|
||||
${row("downloaded", sql, redis, merged)}
|
||||
${row("chapters_download_done", sql, redis, merged)}
|
||||
${row("chapters_download_skipped", sql, redis, merged)}
|
||||
${row("parsed", sql, redis, merged)}
|
||||
${row("chapters_parsed_done", sql, redis, merged)}
|
||||
${row("audio_done", sql, redis, merged)}
|
||||
${row("audio_skipped", sql, redis, merged)}
|
||||
${row("last_update", sql, redis, merged)}
|
||||
`;
|
||||
});
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Row helper
|
||||
------------------------------------------------------------ */
|
||||
|
||||
function row(field, sql, redis, merged) {
|
||||
const s = sql[field] ?? "";
|
||||
const r = redis[field] ?? "";
|
||||
const m = merged[field] ?? "";
|
||||
|
||||
const cls = String(s) === String(r) ? "same" : "diff";
|
||||
|
||||
return `
|
||||
<tr>
|
||||
<th>${field}</th>
|
||||
<td class="${cls}">${s}</td>
|
||||
<td class="${cls}">${r}</td>
|
||||
<td>${m}</td>
|
||||
</tr>
|
||||
`;
|
||||
}
|
||||
@ -0,0 +1,130 @@
|
||||
/* =======================================================================
|
||||
File: static/js/log_view.js
|
||||
Purpose:
|
||||
High-performance rolling log viewer
|
||||
- efficient delta polling
|
||||
- append-only mode (no DOM reset)
|
||||
- rolling limit (prevents memory freeze)
|
||||
- supports both global logs and per-book logs
|
||||
======================================================================= */
|
||||
|
||||
console.log(">>> log_view.js LOADING…");
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
Global log viewer state
|
||||
--------------------------------------------------------- */
|
||||
let LOG_FILTER = "ALL";
|
||||
let LAST_LOG_INDEX = -1; // delta offset
|
||||
const MAX_LOG_LINES = 600;
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
Apply filter on existing log lines
|
||||
--------------------------------------------------------- */
|
||||
function applyLogFilter() {
|
||||
const lines = $$(".log-line");
|
||||
lines.forEach((line) => {
|
||||
const text = line.innerText;
|
||||
const show = LOG_FILTER === "ALL" || (text && text.includes(LOG_FILTER));
|
||||
line.style.display = show ? "block" : "none";
|
||||
});
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
DOM Ready — bind clear/filter
|
||||
--------------------------------------------------------- */
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
console.log(">>> log_view.js DOMContentLoaded");
|
||||
|
||||
const clearBtn = $("#log-clear");
|
||||
const output = $("#log-output");
|
||||
|
||||
if (!output) {
|
||||
console.log(">>> log_view.js: No #log-output → viewer disabled");
|
||||
return;
|
||||
}
|
||||
|
||||
if (clearBtn) {
|
||||
clearBtn.addEventListener("click", () => {
|
||||
console.log(">>> log_view.js: Clear log viewer");
|
||||
output.innerHTML = "";
|
||||
LAST_LOG_INDEX = -1;
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
Append ONE line
|
||||
--------------------------------------------------------- */
|
||||
function rollingAppend(lineText) {
|
||||
const output = $("#log-output");
|
||||
if (!output) return;
|
||||
|
||||
const div = document.createElement("div");
|
||||
div.classList.add("log-line");
|
||||
|
||||
// Type detection
|
||||
if (lineText.includes("[DL]") || lineText.includes("[DOWNLOAD]"))
|
||||
div.classList.add("dl");
|
||||
else if (lineText.includes("[PARSE]")) div.classList.add("parse");
|
||||
else if (lineText.includes("[SAVE]")) div.classList.add("save");
|
||||
else if (lineText.includes("[AUDIO]")) div.classList.add("audio");
|
||||
else if (lineText.includes("[CTRL]")) div.classList.add("ctrl");
|
||||
else if (lineText.includes("[ERROR]")) div.classList.add("error");
|
||||
else div.classList.add("default");
|
||||
|
||||
div.textContent = lineText;
|
||||
output.appendChild(div);
|
||||
|
||||
// Rolling limit
|
||||
while (output.childNodes.length > MAX_LOG_LINES) {
|
||||
output.removeChild(output.firstChild);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
Primary entry: updateLogs()
|
||||
Accepts:
|
||||
{ logs:[...], last:N }
|
||||
OR legacy:
|
||||
{ lines:[...], last:N }
|
||||
--------------------------------------------------------- */
|
||||
function updateLogs(packet) {
|
||||
const output = $("#log-output");
|
||||
if (!output || !packet) return;
|
||||
|
||||
let lines = packet.logs || packet.lines || [];
|
||||
if (!Array.isArray(lines)) return;
|
||||
|
||||
lines.forEach((line) => rollingAppend(line));
|
||||
|
||||
// Correct unified delta index handling
|
||||
if (packet.last !== undefined) {
|
||||
LAST_LOG_INDEX = packet.last;
|
||||
}
|
||||
|
||||
applyLogFilter();
|
||||
autoScroll(output);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
Delta polling — global logs ONLY
|
||||
(dashboard.js overrides logs per-book)
|
||||
--------------------------------------------------------- */
|
||||
function pollLogs() {
|
||||
fetch(`/logs?last_index=${LAST_LOG_INDEX}`)
|
||||
.then((r) => r.json())
|
||||
.then((data) => {
|
||||
const lines = data.lines || [];
|
||||
if (lines.length > 0) {
|
||||
lines.forEach((line) => rollingAppend(line));
|
||||
LAST_LOG_INDEX = data.last;
|
||||
}
|
||||
})
|
||||
.catch((err) => {
|
||||
console.warn(">>> log_view.js pollLogs() error:", err);
|
||||
});
|
||||
}
|
||||
|
||||
setInterval(pollLogs, 2800);
|
||||
|
||||
console.log(">>> log_view.js LOADED");
|
||||
@ -0,0 +1,98 @@
|
||||
/* ========================================================
|
||||
File: static/js/state_updater.js
|
||||
Purpose:
|
||||
- Poll /api/state/all
|
||||
- Dispatch merged state to subscribers
|
||||
(bookcard_controller, inspect_state, others)
|
||||
- Pause polling when tab inactive
|
||||
======================================================== */
|
||||
|
||||
console.log("[STATE-UPDATER] loaded");
|
||||
|
||||
const STATE_POLL_INTERVAL_MS = 2500;
|
||||
const STATE_ENDPOINT = "/api/state/all";
|
||||
|
||||
let STATE_TIMER = null;
|
||||
|
||||
/* ========================================================
|
||||
INIT
|
||||
======================================================== */
|
||||
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
initStateUpdater();
|
||||
});
|
||||
|
||||
function initStateUpdater() {
|
||||
const cards = document.querySelectorAll(".book-card");
|
||||
|
||||
if (cards.length === 0) {
|
||||
console.log("[STATE-UPDATER] No bookcards found — skipping");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[STATE-UPDATER] Starting updater for ${cards.length} bookcards`);
|
||||
|
||||
startPolling(true);
|
||||
|
||||
document.addEventListener("visibilitychange", () => {
|
||||
document.hidden ? stopPolling() : startPolling(true);
|
||||
});
|
||||
}
|
||||
|
||||
/* ========================================================
|
||||
DISPATCH
|
||||
======================================================== */
|
||||
|
||||
function dispatchState(entries) {
|
||||
console.debug("[STATE] dispatch", entries.length);
|
||||
|
||||
// 1. Bookcards
|
||||
if (typeof window.updateBookCardsFromState === "function") {
|
||||
window.updateBookCardsFromState(entries);
|
||||
}
|
||||
|
||||
// 2. Inspect state tables / other subscribers
|
||||
window.dispatchEvent(new CustomEvent("state:update", { detail: entries }));
|
||||
}
|
||||
|
||||
/* ========================================================
|
||||
POLLING CONTROL
|
||||
======================================================== */
|
||||
|
||||
function startPolling(immediate = false) {
|
||||
if (STATE_TIMER) return;
|
||||
|
||||
console.log("[STATE-UPDATER] Start polling");
|
||||
|
||||
if (immediate) pollState();
|
||||
|
||||
STATE_TIMER = setInterval(pollState, STATE_POLL_INTERVAL_MS);
|
||||
}
|
||||
|
||||
function stopPolling() {
|
||||
if (!STATE_TIMER) return;
|
||||
|
||||
console.log("[STATE-UPDATER] Stop polling (tab inactive)");
|
||||
clearInterval(STATE_TIMER);
|
||||
STATE_TIMER = null;
|
||||
}
|
||||
|
||||
/* ========================================================
|
||||
POLL API
|
||||
======================================================== */
|
||||
|
||||
async function pollState() {
|
||||
if (document.hidden) return;
|
||||
|
||||
try {
|
||||
const resp = await fetch(STATE_ENDPOINT, { cache: "no-store" });
|
||||
if (!resp.ok) return;
|
||||
|
||||
const entries = await resp.json();
|
||||
if (!Array.isArray(entries)) return;
|
||||
|
||||
dispatchState(entries);
|
||||
} catch (e) {
|
||||
console.error("[STATE-UPDATER] poll error", e);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,35 @@
|
||||
<!-- File: templates/base.html -->
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<title>BookScraper</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
|
||||
<!-- CSS -->
|
||||
<link rel="stylesheet" href="/static/css/style.css" />
|
||||
<link rel="stylesheet" href="/static/css/dashboard.css" />
|
||||
</head>
|
||||
<body>
|
||||
<!-- Global Navigation -->
|
||||
{% include "components/nav.html" %}
|
||||
|
||||
<!-- Main Content Area -->
|
||||
<main class="container">{% block content %}{% endblock %}</main>
|
||||
|
||||
<!-- JS -->
|
||||
<script src="/static/js/app.js"></script>
|
||||
<script src="/static/js/log_view.js"></script>
|
||||
<script src="/static/js/dashboard.js"></script>
|
||||
|
||||
<!-- GLOBAL STATE UPDATER -->
|
||||
<script src="/static/js/state_updater.js"></script>
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
if (typeof initStateUpdater === "function") {
|
||||
initStateUpdater();
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
@ -0,0 +1,66 @@
|
||||
<!-- =======================================================================
|
||||
File: templates/components/book_list_item.html
|
||||
Purpose:
|
||||
Dashboard weergave van één boek in de lijst.
|
||||
Variabelen komen binnen via:
|
||||
book.<veld>
|
||||
→ Boek gebruikt nu uitsluitend book_idx als primaire sleutel
|
||||
======================================================================= -->
|
||||
|
||||
<div class="book-list-item" data-book-idx="{{ book.book_idx }}">
|
||||
<!-- Left area: title + metadata -->
|
||||
<div class="book-info">
|
||||
<div class="book-title">{{ book.title }}</div>
|
||||
|
||||
<div class="book-meta">
|
||||
<span class="meta-label">IDX:</span> {{ book.book_idx }} {% if
|
||||
book.last_update %}
|
||||
<span class="meta-separator">•</span>
|
||||
<span class="meta-label">Updated:</span> {{ book.last_update }} {% endif
|
||||
%}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Center area: Status -->
|
||||
<div class="book-status">
|
||||
<span class="status-badge status-{{ book.status|lower }}">
|
||||
{{ book.status|capitalize }}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<!-- Right area: progress mini-bars -->
|
||||
<div class="book-progress-mini">
|
||||
<!-- Download progress -->
|
||||
<div class="progress-mini-row">
|
||||
<span class="mini-label">DL:</span>
|
||||
|
||||
{% set pct_dl = 0 %} {% if book.download_total > 0 %} {% set pct_dl = (100
|
||||
* book.download_done / book.download_total) | round(0) %} {% endif %}
|
||||
|
||||
<div class="progress-mini-bar">
|
||||
<div class="fill" style="width: {{ pct_dl }}%;"></div>
|
||||
</div>
|
||||
<span class="mini-value">{{ pct_dl }}%</span>
|
||||
</div>
|
||||
|
||||
<!-- Audio progress -->
|
||||
<div class="progress-mini-row">
|
||||
<span class="mini-label">AU:</span>
|
||||
|
||||
{% set pct_au = 0 %} {% if book.audio_total > 0 %} {% set pct_au = (100 *
|
||||
book.audio_done / book.audio_total) | round(0) %} {% endif %}
|
||||
|
||||
<div class="progress-mini-bar audio">
|
||||
<div class="fill audio-fill" style="width: {{ pct_au }}%;"></div>
|
||||
</div>
|
||||
<span class="mini-value">{{ pct_au }}%</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Abort button -->
|
||||
<div class="book-abort-area">
|
||||
<button class="abort-btn" onclick="abortBookAjax('{{ book.book_idx }}')">
|
||||
Abort
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
@ -0,0 +1,90 @@
|
||||
{# ============================================================ File:
|
||||
templates/components/bookcard.html Purpose: Eén enkele boekkaart (dumb
|
||||
component) ============================================================ #}
|
||||
|
||||
<div class="book-card {{ b.status }}" data-book-idx="{{ b.book_idx }}">
|
||||
<!-- HIDE -->
|
||||
<form
|
||||
action="/hide/{{ b.book_idx }}"
|
||||
method="POST"
|
||||
class="hide-form"
|
||||
onsubmit="return confirm('Dit boek verbergen?')"
|
||||
>
|
||||
<button class="icon-btn icon-hide" title="Verbergen">
|
||||
<i class="fa-solid fa-xmark"></i>
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<!-- COVER -->
|
||||
<div class="book-cover">
|
||||
{% if b.cover_path %}
|
||||
<img
|
||||
src="/{{ b.cover_path }}"
|
||||
class="book-img"
|
||||
data-field="cover"
|
||||
alt="cover"
|
||||
/>
|
||||
{% else %}
|
||||
<div class="book-img placeholder" data-field="cover">?</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- META -->
|
||||
<div class="book-meta">
|
||||
<!-- STATUS BADGE -->
|
||||
{% if b.status %}
|
||||
<span
|
||||
class="status-badge status-{{ b.status }}"
|
||||
title="
|
||||
{% if b.status == 'done' %}Alle chapters en audio zijn compleet{% endif %}
|
||||
{% if b.status == 'audio' %}Downloads compleet, audio wordt nog gegenereerd{% endif %}
|
||||
{% if b.status == 'downloading' %}Bezig met downloaden{% endif %}
|
||||
"
|
||||
>
|
||||
{{ b.status | upper }}
|
||||
</span>
|
||||
{% endif %}
|
||||
|
||||
<div class="book-title" data-field="title">{{ b.title }}</div>
|
||||
<div class="book-author" data-field="author">{{ b.author }}</div>
|
||||
<div class="book-created">
|
||||
Geregistreerd: <span data-field="created_at">{{ b.created_at }}</span>
|
||||
</div>
|
||||
|
||||
<!-- ACTIONS -->
|
||||
<div class="book-actions">
|
||||
<!-- START -->
|
||||
<form action="/start" method="POST">
|
||||
<input type="hidden" name="book_idx" value="{{ b.book_idx }}" />
|
||||
<button class="icon-btn icon-start" title="Start" data-action="start">
|
||||
<i class="fa-solid fa-play"></i>
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<!-- ABORT -->
|
||||
<form action="/abort/{{ b.book_idx }}" method="POST">
|
||||
<input type="hidden" name="book_idx" value="{{ b.book_idx }}" />
|
||||
<button class="icon-btn icon-abort" title="Abort" data-action="abort">
|
||||
<i class="fa-solid fa-stop"></i>
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<form
|
||||
method="post"
|
||||
action="/inspect/statuscheck/{{ b.book_idx }}"
|
||||
style="display: inline-block"
|
||||
>
|
||||
<button
|
||||
type="submit"
|
||||
class="icon-btn icon-statuscheck"
|
||||
title="Herbereken status op basis van bestanden"
|
||||
>
|
||||
<i class="fa-solid fa-magnifying-glass-chart"></i>
|
||||
</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- PROGRESS -->
|
||||
<div class="book-progress">{% include "components/progress_box.html" %}</div>
|
||||
</div>
|
||||
@ -0,0 +1,44 @@
|
||||
<!-- =======================================================================
|
||||
File: templates/components/log_view.html
|
||||
Purpose: Reusable log viewer component for any page (dashboard/start/book)
|
||||
Notes:
|
||||
- Requires JS: /static/js/log_view.js
|
||||
- Supports filtering by tag (e.g. [DL], [PARSE], [AUDIO], [CTRL], ...)
|
||||
- Template expects optional variable `logs` (list[str])
|
||||
======================================================================= -->
|
||||
|
||||
<div id="log-viewer" class="log-viewer">
|
||||
<!-- ========================== HEADER ========================== -->
|
||||
<div class="log-header">
|
||||
<h2>Live Log</h2>
|
||||
|
||||
<div class="log-filters">
|
||||
<label for="log-filter">Filter:</label>
|
||||
|
||||
<select id="log-filter">
|
||||
<option value="ALL">All</option>
|
||||
<option value="[DL]">Download</option>
|
||||
<option value="[PARSE]">Parse</option>
|
||||
<option value="[SAVE]">Save</option>
|
||||
<option value="[AUDIO]">Audio</option>
|
||||
<option value="[CTRL]">Controller</option>
|
||||
<option value="[SCRAPING]">Scraping</option>
|
||||
<option value="[BOOK]">Book</option>
|
||||
<option value="[ERROR]">Errors</option>
|
||||
</select>
|
||||
|
||||
<button id="log-clear" class="btn-small">Clear</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ========================== OUTPUT ========================== -->
|
||||
<div id="log-output" class="log-output">
|
||||
{% if logs and logs|length > 0 %} {% for line in logs %}
|
||||
<div class="log-line">{{ line }}</div>
|
||||
{% endfor %} {% else %}
|
||||
<div class="log-empty">No logs yet…</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="/static/js/log_view.js"></script>
|
||||
@ -0,0 +1,40 @@
|
||||
<!-- =======================================================================
|
||||
File: templates/components/nav.html
|
||||
Purpose: Global navigation bar for BookScraper UI (improved version)
|
||||
======================================================================= -->
|
||||
|
||||
<nav class="navbar">
|
||||
<div class="nav-inner">
|
||||
<!-- Branding / Home -->
|
||||
<div class="nav-brand">
|
||||
<a href="/">BookScraper</a>
|
||||
</div>
|
||||
|
||||
<!-- Main navigation -->
|
||||
<ul class="nav-links">
|
||||
<li>
|
||||
<a href="/dashboard" class="nav-item"> Dashboard </a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<a href="/api/books" class="nav-item"> Active Books </a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<a href="/debug/inspect_state" class="nav-item"> State overview </a>
|
||||
</li>
|
||||
|
||||
<!-- Tools dropdown -->
|
||||
<li class="nav-dropdown">
|
||||
<span class="nav-item">Tools ▾</span>
|
||||
<ul class="dropdown-menu">
|
||||
<li><a href="/api/db/books">DB Viewer</a></li>
|
||||
<li><a href="/debug/inspect_state">Inspect State</a></li>
|
||||
<li><a href="/debug/sync_state">Sync State</a></li>
|
||||
<li><a href="/debug/redis-keys">Redis Keys</a></li>
|
||||
<li><a href="/debug/queues">queues</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
@ -0,0 +1,34 @@
|
||||
<!-- =======================================================================
|
||||
File: templates/components/progress_box.html
|
||||
Purpose:
|
||||
Dumb progress UI for a book card.
|
||||
Initial values via Jinja, live updates via state_updater.js
|
||||
======================================================================= -->
|
||||
|
||||
<div class="progress-box">
|
||||
<!-- DOWNLOAD -->
|
||||
<div class="progress-row">
|
||||
<div class="progress-label">Download</div>
|
||||
<div class="progressbar">
|
||||
<div
|
||||
class="progressbar-fill download"
|
||||
data-field="download_pct"
|
||||
style="width: 0%"
|
||||
></div>
|
||||
<div class="progressbar-text" data-field="download_text">0 / 0</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- AUDIO -->
|
||||
<div class="progress-row">
|
||||
<div class="progress-label">Audio</div>
|
||||
<div class="progressbar">
|
||||
<div
|
||||
class="progressbar-fill audio"
|
||||
data-field="audio_pct"
|
||||
style="width: 0%"
|
||||
></div>
|
||||
<div class="progressbar-text" data-field="audio_text">0 / 0</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -0,0 +1,21 @@
|
||||
{# ============================================================ File:
|
||||
templates/components/registered_books.html Purpose: Toon een grid van
|
||||
geregistreerde boeken. Elke kaart wordt gerenderd via bookcard.html.
|
||||
============================================================ #}
|
||||
|
||||
<section class="dashboard-section">
|
||||
<h2>Geregistreerde boeken</h2>
|
||||
|
||||
{% if registered and registered|length > 0 %}
|
||||
|
||||
<div class="registered-grid">
|
||||
{% for b in registered %} {% include "components/bookcard.html" %} {% endfor
|
||||
%}
|
||||
</div>
|
||||
|
||||
{% else %}
|
||||
|
||||
<p>Geen geregistreerde boeken.</p>
|
||||
|
||||
{% endif %}
|
||||
</section>
|
||||
@ -0,0 +1,38 @@
|
||||
<!-- =======================================================================
|
||||
File: templates/dashboard/book_detail.html
|
||||
Purpose:
|
||||
Detailpagina voor één book_idx.
|
||||
Toont progress (download/audio) + filters + live logs.
|
||||
======================================================================= -->
|
||||
|
||||
{% extends "layout.html" %} {% block content %}
|
||||
|
||||
<div class="dashboard-detail">
|
||||
<h1 class="page-title">{{ title }}</h1>
|
||||
<p class="breadcrumb">
|
||||
<a href="/dashboard">← Terug naar dashboard</a>
|
||||
</p>
|
||||
|
||||
<!-- Progress box -->
|
||||
<section id="progressSection">
|
||||
{% include "components/progress_box.html" with book_idx=book_idx,
|
||||
title=title, download_total=download_total, download_done=download_done,
|
||||
audio_total=audio_total, audio_done=audio_done %}
|
||||
</section>
|
||||
|
||||
<!-- Log view -->
|
||||
<section class="log-section">
|
||||
<h2>Live Log</h2>
|
||||
{% include "components/log_view.html" %}
|
||||
</section>
|
||||
</div>
|
||||
|
||||
<!-- PAGE-SPECIFIC JS -->
|
||||
<script>
|
||||
const BOOK_IDX = "{{ book_idx }}";
|
||||
</script>
|
||||
|
||||
<script src="/static/js/log_view.js"></script>
|
||||
<script src="/static/js/dashboard.js"></script>
|
||||
|
||||
{% endblock %}
|
||||
@ -0,0 +1,46 @@
|
||||
{% extends "layout.html" %} {% block content %}
|
||||
|
||||
<div class="dashboard-container">
|
||||
<!-- =======================================================================
|
||||
File: templates/dashboard/dashboard.html
|
||||
Purpose:
|
||||
Functioneel dashboard:
|
||||
• Start nieuwe scrape via URL input component
|
||||
• Toont lijst van actieve boeken (actieve state model)
|
||||
• Toont globale live logs
|
||||
Vereist:
|
||||
- books: lijst van actieve boeken
|
||||
- logs: lijst van globale logs (optioneel)
|
||||
======================================================================= -->
|
||||
<!-- ===========================================================
|
||||
URL INPUT — Start nieuwe scrape
|
||||
=========================================================== -->
|
||||
<section class="dashboard-section">
|
||||
<h2>Start nieuwe scrape</h2>
|
||||
{% include "components/url_input.html" %}
|
||||
</section>
|
||||
|
||||
<hr />
|
||||
|
||||
<!-- ===========================================================
|
||||
BOOK LIST
|
||||
=========================================================== -->
|
||||
|
||||
{% include "components/registered_books.html" %}
|
||||
<hr />
|
||||
|
||||
<!-- ===========================================================
|
||||
GLOBAL LIVE LOG VIEW
|
||||
=========================================================== -->
|
||||
<section class="dashboard-section">
|
||||
<h2>Live log (globaal)</h2>
|
||||
|
||||
{# log_view verwacht altijd 'logs' — garandeer list #} {% set logs = logs or
|
||||
[] %} {% include "components/log_view.html" %}
|
||||
</section>
|
||||
</div>
|
||||
|
||||
<!-- JS -->
|
||||
<script src="/static/js/dashboard.js"></script>
|
||||
|
||||
{% endblock %}
|
||||
@ -0,0 +1,95 @@
|
||||
{# ============================================================ File:
|
||||
templates/debug/inspect_state.html Purpose: Inspect SQLite vs Redis state per
|
||||
book_idx - Initial render via Jinja - Live updates via inspect_state.js -
|
||||
BookCard is server-rendered and NEVER replaced - Only the right-side state table
|
||||
is updated dynamically
|
||||
============================================================ #} {% extends
|
||||
"layout.html" %} {% block content %}
|
||||
|
||||
<h1>State Inspection (SQL vs Redis)</h1>
|
||||
|
||||
<style>
|
||||
.state-block {
|
||||
display: grid;
|
||||
grid-template-columns: 380px 1fr;
|
||||
gap: 20px;
|
||||
margin-bottom: 35px;
|
||||
padding: 18px;
|
||||
border: 1px solid #444;
|
||||
background: #222;
|
||||
border-radius: 8px;
|
||||
}
|
||||
|
||||
.state-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
.state-table th,
|
||||
.state-table td {
|
||||
border: 1px solid #555;
|
||||
padding: 6px 10px;
|
||||
}
|
||||
|
||||
.state-table th {
|
||||
background: #333;
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
.state-table td {
|
||||
background: #2a2a2a;
|
||||
color: #ddd;
|
||||
}
|
||||
|
||||
.same {
|
||||
color: #9f9 !important;
|
||||
}
|
||||
|
||||
.diff {
|
||||
color: #ff7b7b !important;
|
||||
font-weight: bold;
|
||||
}
|
||||
</style>
|
||||
|
||||
<div id="state-container">
|
||||
{% for entry in results %}
|
||||
<div class="state-block" data-book-idx="{{ entry.book_idx }}">
|
||||
<!-- LEFT: BookCard (server-rendered, NEVER replaced) -->
|
||||
<div>
|
||||
{% if entry.card %} {% with b = entry.card %} {% include
|
||||
"components/bookcard.html" %} {% endwith %} {% else %}
|
||||
<strong>{{ entry.book_idx }}</strong>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- RIGHT: State table (updated by JS) -->
|
||||
<div>
|
||||
<table class="state-table">
|
||||
<tr>
|
||||
<th>Field</th>
|
||||
<th>SQLite</th>
|
||||
<th>Redis</th>
|
||||
<th>Merged</th>
|
||||
</tr>
|
||||
|
||||
{% set sql = entry.sqlite %} {% set redis = entry.redis %} {% set merged
|
||||
= entry.would_merge_to %} {% for field in [ "status", "chapters_total",
|
||||
"downloaded", "chapters_download_done", "chapters_download_skipped",
|
||||
"parsed", "chapters_parsed_done", "audio_done", "audio_skipped",
|
||||
"last_update" ] %}
|
||||
<tr>
|
||||
<th>{{ field }}</th>
|
||||
<td>{{ sql.get(field, "") }}</td>
|
||||
<td>{{ redis.get(field, "") }}</td>
|
||||
<td>{{ merged.get(field, "") }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
{% endblock %} {% block scripts %}
|
||||
<script src="/static/js/inspect_state.js"></script>
|
||||
{% endblock %}
|
||||
@ -0,0 +1,91 @@
|
||||
{% extends "layout.html" %} {% block content %}
|
||||
<h1>Celery Queue Debug</h1>
|
||||
|
||||
<style>
|
||||
.debug-section {
|
||||
margin-bottom: 40px;
|
||||
}
|
||||
.debug-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.debug-table th,
|
||||
.debug-table td {
|
||||
border: 1px solid #444;
|
||||
padding: 6px 10px;
|
||||
}
|
||||
.debug-table th {
|
||||
background: #333;
|
||||
color: #fff;
|
||||
}
|
||||
pre {
|
||||
background: #1e1e1e;
|
||||
color: #ddd;
|
||||
padding: 10px;
|
||||
overflow-x: auto;
|
||||
}
|
||||
code {
|
||||
color: #9cf;
|
||||
}
|
||||
</style>
|
||||
|
||||
<div class="debug-section">
|
||||
<h2>Workers</h2>
|
||||
|
||||
<h3>Active Tasks</h3>
|
||||
<pre>{{ workers_active | tojson(indent=2) }}</pre>
|
||||
|
||||
<h3>Reserved</h3>
|
||||
<pre>{{ workers_reserved | tojson(indent=2) }}</pre>
|
||||
|
||||
<h3>Scheduled</h3>
|
||||
<pre>{{ workers_scheduled | tojson(indent=2) }}</pre>
|
||||
</div>
|
||||
|
||||
<hr />
|
||||
|
||||
<div class="debug-section">
|
||||
<h2>Queues</h2>
|
||||
|
||||
{% for q in queues %}
|
||||
<div class="debug-queue">
|
||||
<h3>{{ q.name }} ({{ q.length }} items)</h3>
|
||||
|
||||
<table class="debug-table">
|
||||
<tr>
|
||||
<th>Redis Key</th>
|
||||
<td>{{ q.redis_key }}</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<th>Length</th>
|
||||
<td>{{ q.length }}</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<th>Items (first 30)</th>
|
||||
<td>
|
||||
{% if q["items"] %}
|
||||
<ul style="margin: 0; padding-left: 20px">
|
||||
{% for item in q["items"] %}
|
||||
<li><code>{{ item | e }}</code></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<i>No items</i>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
<script>
|
||||
setInterval(() => {
|
||||
window.location.reload();
|
||||
}, 5000);
|
||||
</script>
|
||||
|
||||
{% endblock %}
|
||||
@ -0,0 +1,23 @@
|
||||
<!-- =======================================================================
|
||||
File: templates/home.html
|
||||
Purpose:
|
||||
New landing page for starting a scrape.
|
||||
Does NOT replace existing index.html.
|
||||
Uses reusable components (url_input).
|
||||
Redirects to /start?url=...
|
||||
======================================================================= -->
|
||||
|
||||
{% extends "layout.html" %} {% block content %}
|
||||
|
||||
<div class="landing-container">
|
||||
<h1 class="landing-title">Start a New Book Scrape</h1>
|
||||
|
||||
<!-- Reusable URL input component -->
|
||||
{% include "components/url_input.html" %}
|
||||
|
||||
<div class="landing-links">
|
||||
<a href="/dashboard">→ Go to Dashboard</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% endblock %}
|
||||
@ -1,34 +1,53 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="nl">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<title>BookScraper</title>
|
||||
<style>
|
||||
body { font-family: Arial, sans-serif; padding: 40px; max-width: 600px; margin: auto; }
|
||||
h1 { margin-bottom: 20px; }
|
||||
input[type="text"] {
|
||||
width: 100%; padding: 12px; font-size: 16px;
|
||||
border: 1px solid #ccc; border-radius: 6px;
|
||||
}
|
||||
button {
|
||||
margin-top: 20px;
|
||||
padding: 12px 20px;
|
||||
background: #007bff; color: white;
|
||||
border: none; border-radius: 6px;
|
||||
font-size: 16px; cursor: pointer;
|
||||
}
|
||||
button:hover { background: #0056b3; }
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
padding: 40px;
|
||||
max-width: 600px;
|
||||
margin: auto;
|
||||
}
|
||||
h1 {
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
input[type="text"] {
|
||||
width: 100%;
|
||||
padding: 12px;
|
||||
font-size: 16px;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 6px;
|
||||
}
|
||||
button {
|
||||
margin-top: 20px;
|
||||
padding: 12px 20px;
|
||||
background: #007bff;
|
||||
color: white;
|
||||
border: none;
|
||||
border-radius: 6px;
|
||||
font-size: 16px;
|
||||
cursor: pointer;
|
||||
}
|
||||
button:hover {
|
||||
background: #0056b3;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
</head>
|
||||
<body>
|
||||
<h1>BookScraper WebGUI</h1>
|
||||
|
||||
<h1>BookScraper WebGUI</h1>
|
||||
|
||||
<form action="/start" method="POST">
|
||||
<label for="url">Geef een boek-URL op:</label><br><br>
|
||||
<input type="text" id="url" name="url" placeholder="https://example.com/book/12345" required>
|
||||
<button type="submit">Start Scraping</button>
|
||||
</form>
|
||||
|
||||
</body>
|
||||
<form action="/init" method="POST">
|
||||
<label for="url">Geef een boek-URL op:</label><br /><br />
|
||||
<input
|
||||
type="text"
|
||||
id="url"
|
||||
name="url"
|
||||
placeholder="https://example.com/book/12345"
|
||||
required
|
||||
/>
|
||||
<button type="submit">Start Scraping</button>
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@ -0,0 +1,15 @@
|
||||
<h2>{{ title }}</h2>
|
||||
|
||||
<div>
|
||||
<button id="abortBtn">Abort Download</button>
|
||||
</div>
|
||||
|
||||
<div id="progressBox">
|
||||
<div id="bar"></div>
|
||||
</div>
|
||||
|
||||
<pre id="logBox">Loading…</pre>
|
||||
|
||||
<script>
|
||||
// Poll elke seconde
|
||||
</script>
|
||||
@ -0,0 +1,13 @@
|
||||
#!/bin/sh
|
||||
# mp4info shim for m4b-tool (ffprobe-based)
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "Usage: mp4info <file>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ffprobe outputs float seconds; m4b-tool expects an integer
|
||||
ffprobe -v error \
|
||||
-show_entries format=duration \
|
||||
-of default=noprint_wrappers=1:nokey=1 \
|
||||
"$1" | awk '{ printf "%d\n", ($1 + 0.5) }'
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue