Compare commits
No commits in common. 'main' and 'feat/dashboard-upgrade' have entirely different histories.
main
...
feat/dashb
@ -1,320 +1,97 @@
|
||||
# ============================================================
|
||||
# File: db/repository.py
|
||||
# Purpose:
|
||||
# Unified façade for BookScraper database state.
|
||||
# High-level BookScraper database interface.
|
||||
# This is the ONLY module Celery tasks and Flask should use.
|
||||
#
|
||||
# Responsibilities:
|
||||
# - Route metadata → SQLite
|
||||
# - Route counters → Redis (live) + SQLite (snapshot)
|
||||
# - Provide a clean API for tasks and Flask UI
|
||||
# ============================================================
|
||||
# ============================================================
|
||||
# UPDATED — canonical read model via get_book_state
|
||||
# ============================================================
|
||||
|
||||
from scraper.logger_decorators import logcall
|
||||
from logbus.publisher import log
|
||||
|
||||
import redis
|
||||
import os
|
||||
|
||||
# ============================================================
|
||||
# SQL low-level engines (snapshot storage)
|
||||
# ============================================================
|
||||
from db.state_sql import (
|
||||
sql_fetch_book,
|
||||
sql_fetch_all_books,
|
||||
sql_set_status,
|
||||
sql_set_chapters_total,
|
||||
sql_register_book,
|
||||
sql_update_book,
|
||||
# Uses low-level primitives from db.db, but exposes
|
||||
# domain-level operations:
|
||||
# - fetch_book / fetch_all_books
|
||||
# - create_or_update_book
|
||||
# - set_status
|
||||
# - incrementing counters
|
||||
# ============================================================
|
||||
|
||||
from db.db import (
|
||||
upsert_book,
|
||||
_raw_get_book,
|
||||
_raw_get_all_books,
|
||||
)
|
||||
|
||||
# ============================================================
|
||||
# REDIS low-level engines (live counters)
|
||||
# ============================================================
|
||||
from db.state_redis import (
|
||||
redis_set_status,
|
||||
redis_set_chapters_total,
|
||||
redis_inc_download_done,
|
||||
redis_inc_download_skipped,
|
||||
redis_inc_parsed_done,
|
||||
redis_inc_audio_done,
|
||||
redis_inc_audio_skipped,
|
||||
)
|
||||
|
||||
# ============================================================
|
||||
# Redis client (read-only for legacy + guards)
|
||||
# ============================================================
|
||||
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
|
||||
_r = redis.Redis.from_url(REDIS_URL, decode_responses=True)
|
||||
# ------------------------------------------------------------
|
||||
# FETCH OPERATIONS
|
||||
# ------------------------------------------------------------
|
||||
def fetch_book(book_id):
|
||||
"""Return a single book dict or None."""
|
||||
return _raw_get_book(book_id)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# LEGACY PROGRESS (UI only, unchanged)
|
||||
# ============================================================
|
||||
def _legacy_get_progress(book_idx):
|
||||
return {
|
||||
"book_idx": book_idx,
|
||||
"total": int(_r.get(f"progress:{book_idx}:total") or 0),
|
||||
"completed": int(_r.get(f"progress:{book_idx}:completed") or 0),
|
||||
"skipped": int(_r.get(f"progress:{book_idx}:skipped") or 0),
|
||||
"failed": int(_r.get(f"progress:{book_idx}:failed") or 0),
|
||||
"abort": _r.exists(f"abort:{book_idx}") == 1,
|
||||
"failed_list": _r.lrange(f"progress:{book_idx}:failed_list", 0, -1),
|
||||
}
|
||||
|
||||
|
||||
@logcall
|
||||
def get_progress(book_idx):
|
||||
return _legacy_get_progress(book_idx)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# FETCH (SQLite snapshot)
|
||||
# ============================================================
|
||||
@logcall
|
||||
def fetch_book(book_idx):
|
||||
return sql_fetch_book(book_idx)
|
||||
|
||||
|
||||
@logcall
|
||||
def fetch_all_books():
|
||||
return sql_fetch_all_books()
|
||||
"""Return all books ordered newest → oldest."""
|
||||
return _raw_get_all_books()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# INIT / UPDATE METADATA
|
||||
# ============================================================
|
||||
@logcall
|
||||
def register_book(
|
||||
book_idx,
|
||||
title,
|
||||
author=None,
|
||||
description=None,
|
||||
cover_url=None,
|
||||
cover_path=None,
|
||||
book_url=None,
|
||||
):
|
||||
sql_register_book(
|
||||
book_idx,
|
||||
{
|
||||
"book_idx": book_idx,
|
||||
"title": title,
|
||||
"author": author,
|
||||
"description": description,
|
||||
"cover_url": cover_url,
|
||||
"cover_path": cover_path,
|
||||
"book_url": book_url,
|
||||
"chapters_total": 0,
|
||||
"status": "registered",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@logcall
|
||||
def update_book_after_full_scrape(
|
||||
book_idx,
|
||||
# ------------------------------------------------------------
|
||||
# BOOK CREATION / METADATA
|
||||
# ------------------------------------------------------------
|
||||
def create_or_update_book(
|
||||
book_id,
|
||||
title=None,
|
||||
author=None,
|
||||
description=None,
|
||||
cover_url=None,
|
||||
chapters_total=None,
|
||||
cover_url=None,
|
||||
cover_path=None,
|
||||
status=None,
|
||||
):
|
||||
fields = {}
|
||||
|
||||
if title is not None:
|
||||
fields["title"] = title
|
||||
if author is not None:
|
||||
fields["author"] = author
|
||||
if description is not None:
|
||||
fields["description"] = description
|
||||
if cover_url is not None:
|
||||
fields["cover_url"] = cover_url
|
||||
if chapters_total is not None:
|
||||
fields["chapters_total"] = chapters_total
|
||||
|
||||
fields["status"] = "active"
|
||||
sql_update_book(book_idx, fields)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# STATUS
|
||||
# ============================================================
|
||||
@logcall
|
||||
def set_status(book_idx, status):
|
||||
redis_set_status(book_idx, status)
|
||||
sql_set_status(book_idx, status)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# TOTALS
|
||||
# ============================================================
|
||||
@logcall
|
||||
def set_chapters_total(book_idx, total):
|
||||
redis_set_chapters_total(book_idx, total)
|
||||
sql_set_chapters_total(book_idx, total)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# COUNTERS — WRITE ONLY
|
||||
# ============================================================
|
||||
@logcall
|
||||
def inc_download_done(book_idx, amount=1):
|
||||
redis_inc_download_done(book_idx, amount)
|
||||
|
||||
|
||||
@logcall
|
||||
def inc_download_skipped(book_idx, amount=1):
|
||||
redis_inc_download_skipped(book_idx, amount)
|
||||
|
||||
|
||||
@logcall
|
||||
def inc_parsed_done(book_idx, amount=1):
|
||||
redis_inc_parsed_done(book_idx, amount)
|
||||
|
||||
|
||||
@logcall
|
||||
def inc_audio_done(book_idx, amount=1):
|
||||
redis_inc_audio_done(book_idx, amount)
|
||||
|
||||
|
||||
@logcall
|
||||
def inc_audio_skipped(book_idx, amount=1):
|
||||
redis_inc_audio_skipped(book_idx, amount)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# CANONICAL READ MODEL
|
||||
# ============================================================
|
||||
@logcall
|
||||
def get_book_state(book_idx):
|
||||
"""
|
||||
Canonical merged read model.
|
||||
|
||||
Rules:
|
||||
- SQL = snapshot baseline
|
||||
- Redis = live counters
|
||||
- merged = max(sql, redis)
|
||||
- capped at chapters_total
|
||||
"""
|
||||
|
||||
sqlite_row = sql_fetch_book(book_idx) or {}
|
||||
redis_state = _r.hgetall(f"book:{book_idx}:state") or {}
|
||||
|
||||
def _int(v):
|
||||
try:
|
||||
return int(v)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
chapters_total = _int(sqlite_row.get("chapters_total"))
|
||||
|
||||
# SQL snapshot
|
||||
sql_downloaded = _int(sqlite_row.get("downloaded"))
|
||||
sql_audio_done = _int(sqlite_row.get("audio_done"))
|
||||
sql_audio_skipped = _int(sqlite_row.get("audio_skipped"))
|
||||
|
||||
# Redis live
|
||||
redis_downloaded = _int(redis_state.get("chapters_download_done")) + _int(
|
||||
redis_state.get("chapters_download_skipped")
|
||||
)
|
||||
redis_audio_done = _int(redis_state.get("audio_done"))
|
||||
redis_audio_skipped = _int(redis_state.get("audio_skipped"))
|
||||
|
||||
# Merge
|
||||
merged_downloaded = max(sql_downloaded, redis_downloaded)
|
||||
merged_audio_done = max(sql_audio_done, redis_audio_done)
|
||||
merged_audio_skipped = max(sql_audio_skipped, redis_audio_skipped)
|
||||
|
||||
if chapters_total > 0:
|
||||
merged_downloaded = min(merged_downloaded, chapters_total)
|
||||
merged_audio_done = min(merged_audio_done, chapters_total)
|
||||
merged_audio_skipped = min(merged_audio_skipped, chapters_total)
|
||||
|
||||
audio_completed = merged_audio_done + merged_audio_skipped
|
||||
|
||||
# Build state
|
||||
state = dict(sqlite_row)
|
||||
state.update(
|
||||
{
|
||||
"downloaded": merged_downloaded,
|
||||
"audio_done": merged_audio_done,
|
||||
"audio_skipped": merged_audio_skipped,
|
||||
"chapters_total": chapters_total,
|
||||
}
|
||||
)
|
||||
|
||||
# Derived status
|
||||
status = sqlite_row.get("status") or "unknown"
|
||||
if chapters_total > 0:
|
||||
if merged_downloaded < chapters_total:
|
||||
status = "downloading"
|
||||
elif merged_downloaded == chapters_total and audio_completed < chapters_total:
|
||||
status = "audio"
|
||||
elif audio_completed >= chapters_total:
|
||||
status = "done"
|
||||
|
||||
state["status"] = status
|
||||
return state
|
||||
|
||||
|
||||
# ============================================================
|
||||
# READ HELPERS (VIA get_book_state ONLY)
|
||||
# ============================================================
|
||||
@logcall
|
||||
def get_chapters_total(book_idx):
|
||||
return int(get_book_state(book_idx).get("chapters_total", 0))
|
||||
|
||||
|
||||
@logcall
|
||||
def get_audio_done(book_idx):
|
||||
return int(get_book_state(book_idx).get("audio_done", 0))
|
||||
|
||||
|
||||
@logcall
|
||||
def get_audio_completed_total(book_idx):
|
||||
state = get_book_state(book_idx)
|
||||
return int(state.get("audio_done", 0)) + int(state.get("audio_skipped", 0))
|
||||
|
||||
|
||||
# ============================================================
|
||||
# STATUSCHECK GUARD (INTENTIONAL DIRECT REDIS)
|
||||
# ============================================================
|
||||
@logcall
|
||||
def try_trigger_statuscheck(book_idx):
|
||||
return bool(_r.set(f"book:{book_idx}:statuscheck:triggered", "1", nx=True))
|
||||
|
||||
|
||||
# ============================================================
|
||||
# ACTIVE / REGISTERED BOOK LISTS (UI API)
|
||||
# ============================================================
|
||||
@logcall
|
||||
def get_registered_books():
|
||||
"""
|
||||
Books visible in the 'registered' list in the UI.
|
||||
"""
|
||||
all_books = sql_fetch_all_books()
|
||||
HIDDEN_STATES = {"hidden"}
|
||||
return [b for b in all_books if b.get("status") not in HIDDEN_STATES]
|
||||
|
||||
|
||||
@logcall
|
||||
def get_active_books():
|
||||
"""
|
||||
Books currently active in the dashboard.
|
||||
"""
|
||||
all_books = sql_fetch_all_books()
|
||||
HIDDEN_STATES = {"hidden", "done"}
|
||||
return [b for b in all_books if b.get("status") not in HIDDEN_STATES]
|
||||
|
||||
|
||||
@logcall
|
||||
def store_m4b_error(book_idx: str, volume: str, error_text: str):
|
||||
"""
|
||||
Passive storage of m4b errors.
|
||||
No logic, no retries, no state transitions.
|
||||
"""
|
||||
key = f"book:{book_idx}:m4b:errors"
|
||||
entry = f"{volume}: {error_text}"
|
||||
|
||||
_r.rpush(key, entry)
|
||||
if cover_url is not None:
|
||||
fields["cover_url"] = cover_url
|
||||
if cover_path is not None:
|
||||
fields["cover_path"] = cover_path
|
||||
if status is not None:
|
||||
fields["status"] = status
|
||||
|
||||
if fields:
|
||||
upsert_book(book_id, **fields)
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# STATUS MANAGEMENT
|
||||
# ------------------------------------------------------------
|
||||
def set_status(book_id, status):
|
||||
upsert_book(book_id, status=status)
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# INCREMENTING COUNTERS (atomic)
|
||||
# ------------------------------------------------------------
|
||||
def inc_downloaded(book_id, amount=1):
|
||||
book = _raw_get_book(book_id)
|
||||
if not book:
|
||||
return
|
||||
cur = book.get("downloaded", 0) or 0
|
||||
upsert_book(book_id, downloaded=cur + amount)
|
||||
|
||||
|
||||
def inc_parsed(book_id, amount=1):
|
||||
book = _raw_get_book(book_id)
|
||||
if not book:
|
||||
return
|
||||
cur = book.get("parsed", 0) or 0
|
||||
upsert_book(book_id, parsed=cur + amount)
|
||||
|
||||
|
||||
def inc_audio_done(book_id, amount=1):
|
||||
book = _raw_get_book(book_id)
|
||||
if not book:
|
||||
return
|
||||
cur = book.get("audio_done", 0) or 0
|
||||
upsert_book(book_id, audio_done=cur + amount)
|
||||
|
||||
@ -1,130 +0,0 @@
|
||||
# ============================================================
|
||||
# File: db/state_redis.py (UPDATED for book_idx-only architecture)
|
||||
# Purpose:
|
||||
# Low-level Redis counters/state for BookScraper.
|
||||
# Used ONLY by db.repository façade.
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import time
|
||||
import redis
|
||||
|
||||
from logbus.publisher import log
|
||||
|
||||
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
|
||||
r = redis.Redis.from_url(REDIS_URL, decode_responses=True)
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# INTERNAL KEY BUILDER
|
||||
# ------------------------------------------------------------
|
||||
def _key(book_idx: str) -> str:
|
||||
return f"book:{book_idx}:state"
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# STATUS
|
||||
# ------------------------------------------------------------
|
||||
def redis_set_status(book_idx: str, status: str):
|
||||
log(f"[DB-REDIS] Setting status for {book_idx} to {status}")
|
||||
key = _key(book_idx)
|
||||
r.hset(key, "status", status)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# TOTAL CHAPTERS
|
||||
# ------------------------------------------------------------
|
||||
def redis_set_chapters_total(book_idx: str, total: int):
|
||||
key = _key(book_idx)
|
||||
r.hset(key, "chapters_total", total)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# DOWNLOAD COUNTERS
|
||||
# ------------------------------------------------------------
|
||||
def redis_inc_download_done(book_idx: str, amount: int = 1):
|
||||
log(f"[DB-REDIS] Incrementing download done for {book_idx} by {amount}")
|
||||
key = _key(book_idx)
|
||||
r.hincrby(key, "chapters_download_done", amount)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
def redis_inc_download_skipped(book_idx: str, amount: int = 1):
|
||||
log(f"[DB-REDIS] Incrementing download skipped for {book_idx} by {amount}")
|
||||
key = _key(book_idx)
|
||||
r.hincrby(key, "chapters_download_skipped", amount)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PARSE COUNTERS
|
||||
# ------------------------------------------------------------
|
||||
def redis_inc_parsed_done(book_idx: str, amount: int = 1):
|
||||
log(f"[DB-REDIS] Incrementing parsed done for {book_idx} by {amount}")
|
||||
key = _key(book_idx)
|
||||
r.hincrby(key, "chapters_parsed_done", amount)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# AUDIO COUNTERS
|
||||
# ------------------------------------------------------------
|
||||
def redis_inc_audio_done(book_idx: str, amount: int = 1):
|
||||
log(f"[DB-REDIS] Incrementing audio done for {book_idx} by {amount}")
|
||||
key = _key(book_idx)
|
||||
r.hincrby(key, "audio_done", amount)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
def redis_inc_audio_skipped(book_idx: str, amount: int = 1):
|
||||
log(f"[DB-REDIS] Incrementing audio skipped for {book_idx} by {amount}")
|
||||
key = _key(book_idx)
|
||||
r.hincrby(key, "audio_skipped", amount)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# INITIALISE BOOK STATE
|
||||
# ------------------------------------------------------------
|
||||
def init_book_state(book_idx: str, title: str, url: str, chapters_total: int):
|
||||
"""
|
||||
Initialiseert de complete Redis state voor een nieuw boek.
|
||||
LET OP:
|
||||
- Als een key al bestaat → NIET resetten (progress behouden).
|
||||
- Alleen missende velden worden toegevoegd.
|
||||
"""
|
||||
|
||||
key = f"book:{book_idx}:state"
|
||||
|
||||
# Bestaat al? Dan vullen we alleen missende velden aan.
|
||||
exists = r.exists(key)
|
||||
|
||||
pipeline = r.pipeline()
|
||||
|
||||
# Basis metadata
|
||||
pipeline.hsetnx(key, "book_id", book_idx)
|
||||
pipeline.hsetnx(key, "title", title or "")
|
||||
pipeline.hsetnx(key, "url", url or "")
|
||||
|
||||
# State
|
||||
pipeline.hsetnx(key, "status", "registered")
|
||||
|
||||
# Counters
|
||||
pipeline.hsetnx(key, "chapters_total", chapters_total)
|
||||
pipeline.hsetnx(key, "chapters_download_done", 0)
|
||||
pipeline.hsetnx(key, "chapters_download_skipped", 0)
|
||||
pipeline.hsetnx(key, "chapters_parsed_done", 0)
|
||||
pipeline.hsetnx(key, "audio_done", 0)
|
||||
pipeline.hsetnx(key, "audio_skipped", 0)
|
||||
|
||||
# Timestamp
|
||||
pipeline.hset(key, "last_update", int(time.time()))
|
||||
|
||||
pipeline.execute()
|
||||
|
||||
if exists:
|
||||
log(f"[DB-REDIS] init_book_state(): UPDATED existing state for {book_idx}")
|
||||
else:
|
||||
log(f"[DB-REDIS] init_book_state(): CREATED new state for {book_idx}")
|
||||
@ -1,178 +0,0 @@
|
||||
# ============================================================
|
||||
# File: db/state_sql.py (UPDATED for book_idx-only architecture)
|
||||
# Purpose:
|
||||
# Low-level SQLite snapshot layer for BookScraper metadata.
|
||||
# Used ONLY through db.repository façade.
|
||||
# ============================================================
|
||||
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
from logbus.publisher import log
|
||||
|
||||
# Must match db/db.py
|
||||
DB_PATH = os.getenv("BOOKSCRAPER_DB", "/app/data/books.db")
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# INTERNAL HELPERS
|
||||
# ------------------------------------------------------------
|
||||
def _connect():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# FETCH
|
||||
# ------------------------------------------------------------
|
||||
def sql_fetch_book(book_idx):
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT * FROM books WHERE book_idx = ?", (book_idx,))
|
||||
row = cur.fetchone()
|
||||
conn.close()
|
||||
return dict(row) if row else None
|
||||
|
||||
|
||||
def sql_fetch_all_books():
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT * FROM books ORDER BY created_at DESC")
|
||||
rows = cur.fetchall()
|
||||
conn.close()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# REGISTER / UPDATE
|
||||
# ------------------------------------------------------------
|
||||
def sql_register_book(book_idx, fields: dict):
|
||||
"""
|
||||
Insert or replace entire book record.
|
||||
book_idx is the PRIMARY KEY.
|
||||
"""
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
cols = ", ".join(["book_idx"] + list(fields.keys()))
|
||||
placeholders = ", ".join(["?"] * (1 + len(fields)))
|
||||
values = [book_idx] + list(fields.values())
|
||||
|
||||
cur.execute(
|
||||
f"INSERT OR REPLACE INTO books ({cols}) VALUES ({placeholders})",
|
||||
values,
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def sql_update_book(book_idx, fields: dict):
|
||||
if not fields:
|
||||
return
|
||||
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
set_clause = ", ".join([f"{k} = ?" for k in fields])
|
||||
params = list(fields.values()) + [book_idx]
|
||||
|
||||
cur.execute(
|
||||
f"UPDATE books SET {set_clause} WHERE book_idx = ?",
|
||||
params,
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# STATUS
|
||||
# ------------------------------------------------------------
|
||||
def sql_set_status(book_idx, status: str):
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"UPDATE books SET status = ? WHERE book_idx = ?",
|
||||
(status, book_idx),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# CHAPTER TOTAL (snapshot)
|
||||
# ------------------------------------------------------------
|
||||
def sql_set_chapters_total(book_idx, total: int):
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"UPDATE books SET chapters_total = ? WHERE book_idx = ?",
|
||||
(total, book_idx),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# COUNTERS (SNAPSHOT-ONLY)
|
||||
# ------------------------------------------------------------
|
||||
def sql_inc_downloaded(book_idx, amount=1):
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE books
|
||||
SET downloaded = COALESCE(downloaded,0) + ?
|
||||
WHERE book_idx = ?
|
||||
""",
|
||||
(amount, book_idx),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def sql_inc_parsed(book_idx, amount=1):
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE books
|
||||
SET parsed = COALESCE(parsed,0) + ?
|
||||
WHERE book_idx = ?
|
||||
""",
|
||||
(amount, book_idx),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def sql_inc_audio_done(book_idx, amount=1):
|
||||
log(f"[DB-SQL] Incrementing audio_done for {book_idx} by {amount}")
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE books
|
||||
SET audio_done = COALESCE(audio_done,0) + ?
|
||||
WHERE book_idx = ?
|
||||
""",
|
||||
(amount, book_idx),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def sql_inc_audio_skipped(book_idx, amount=1):
|
||||
log(f"[DB-SQL] Incrementing audio_skipped for {book_idx} by {amount}")
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE books
|
||||
SET audio_skipped = COALESCE(audio_skipped,0) + ?
|
||||
WHERE book_idx = ?
|
||||
""",
|
||||
(amount, book_idx),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
@ -1,70 +0,0 @@
|
||||
FROM debian:12
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# System + PHP (PHP 8.2 native)
|
||||
# ----------------------------------------------------------
|
||||
RUN apt-get update && apt-get install -y \
|
||||
ffmpeg \
|
||||
curl \
|
||||
ca-certificates \
|
||||
bash \
|
||||
php-cli \
|
||||
php-intl \
|
||||
php-json \
|
||||
php-mbstring \
|
||||
php-xml \
|
||||
php-curl \
|
||||
php-zip \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-venv \
|
||||
\
|
||||
# build deps for mp4v2
|
||||
git \
|
||||
build-essential \
|
||||
autoconf \
|
||||
automake \
|
||||
libtool \
|
||||
pkg-config \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# Python venv (PEP 668 compliant)
|
||||
# ----------------------------------------------------------
|
||||
RUN python3 -m venv /opt/venv
|
||||
ENV PATH="/opt/venv/bin:/usr/local/bin:$PATH"
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# Build & install mp4v2 (mp4info)
|
||||
# ----------------------------------------------------------
|
||||
WORKDIR /tmp
|
||||
|
||||
RUN git clone https://github.com/sandreas/mp4v2 \
|
||||
&& cd mp4v2 \
|
||||
&& ./configure \
|
||||
&& make -j$(nproc) \
|
||||
&& make install \
|
||||
&& echo "/usr/local/lib" > /etc/ld.so.conf.d/mp4v2.conf \
|
||||
&& ldconfig \
|
||||
&& cd / \
|
||||
&& rm -rf /tmp/mp4v2
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# Install m4b-tool
|
||||
# ----------------------------------------------------------
|
||||
RUN curl -L https://github.com/sandreas/m4b-tool/releases/latest/download/m4b-tool.phar \
|
||||
-o /usr/local/bin/m4b-tool \
|
||||
&& chmod +x /usr/local/bin/m4b-tool
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# App
|
||||
# ----------------------------------------------------------
|
||||
WORKDIR /app
|
||||
COPY requirements.txt /app/requirements.txt
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . /app
|
||||
|
||||
CMD ["bash"]
|
||||
@ -1 +0,0 @@
|
||||
Subproject commit 480a73324f53d0d24bea4931c3902097f8e2a663
|
||||
Binary file not shown.
@ -1,27 +0,0 @@
|
||||
# ============================================================
|
||||
# File: scraper/engine/fetcher.py
|
||||
# Purpose:
|
||||
# Low-level HTML fetch utility shared by all site scrapers.
|
||||
# Replaces scattered _fetch() logic inside BookScraper.
|
||||
# ============================================================
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:118.0) "
|
||||
"Gecko/20100101 Firefox/118.0"
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
def fetch_html(url: str, encoding: str = "utf-8", timeout: int = 10) -> BeautifulSoup:
|
||||
"""
|
||||
Fetch HTML with a consistent user-agent and encoding.
|
||||
Returns BeautifulSoup(lxml).
|
||||
"""
|
||||
resp = requests.get(url, headers=HEADERS, timeout=timeout)
|
||||
resp.encoding = encoding
|
||||
return BeautifulSoup(resp.text, "lxml")
|
||||
@ -1,65 +0,0 @@
|
||||
# ============================================================
|
||||
# File: scraper/engine/parser.py
|
||||
# Purpose:
|
||||
# High-level scraping API coordinating metadata extraction
|
||||
# and chapter extraction using pluggable SiteScraper classes.
|
||||
#
|
||||
# This is the new central engine:
|
||||
# - extract_metadata_only() used by INIT flow
|
||||
# - extract_metadata_full() used by full scraping pipeline
|
||||
# ============================================================
|
||||
|
||||
from scraper.engine.fetcher import fetch_html
|
||||
|
||||
|
||||
def extract_metadata_only(url: str, site_scraper):
|
||||
"""
|
||||
Extract ONLY lightweight metadata:
|
||||
- title
|
||||
- author
|
||||
- description
|
||||
- cover_url
|
||||
- chapters_total = 0
|
||||
"""
|
||||
soup = fetch_html(url, site_scraper.encoding)
|
||||
|
||||
title = site_scraper.parse_title(soup)
|
||||
author = site_scraper.parse_author(soup)
|
||||
description = site_scraper.parse_description(soup)
|
||||
cover_url = site_scraper.parse_cover(soup, url)
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"author": author,
|
||||
"description": description,
|
||||
"cover_url": cover_url,
|
||||
"chapters_total": 0,
|
||||
"book_url": url,
|
||||
}
|
||||
|
||||
|
||||
def extract_metadata_full(url: str, site_scraper):
|
||||
"""
|
||||
Full scraping (metadata + chapterlist).
|
||||
Used by the scraping Celery pipeline.
|
||||
"""
|
||||
soup = fetch_html(url, site_scraper.encoding)
|
||||
|
||||
# metadata
|
||||
meta = extract_metadata_only(url, site_scraper)
|
||||
|
||||
# chapter list
|
||||
chapter_page_url = site_scraper.extract_chapter_page_url(soup)
|
||||
chapter_page_soup = fetch_html(chapter_page_url, site_scraper.encoding)
|
||||
chapters = site_scraper.parse_chapter_list(chapter_page_soup)
|
||||
|
||||
meta["chapters"] = chapters
|
||||
return meta
|
||||
|
||||
|
||||
def build_book_id(title: str) -> str:
|
||||
"""
|
||||
Canonical book_id generator.
|
||||
SCRAPE currently uses title as ID → preserve that behavior.
|
||||
"""
|
||||
return title
|
||||
@ -1,33 +0,0 @@
|
||||
# ============================================================
|
||||
# File: scraper/logger_decorators.py
|
||||
# Purpose: Function-call logging decorator
|
||||
# ============================================================
|
||||
|
||||
from functools import wraps
|
||||
from scraper.logger import log_debug
|
||||
|
||||
|
||||
def logcall(func):
|
||||
"""
|
||||
Decorator: log function name + arguments every time it's called.
|
||||
Usage: @logcall above any function.
|
||||
"""
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
# Naam van de functie
|
||||
name = func.__qualname__
|
||||
|
||||
# Eerste logregel vóór uitvoering
|
||||
# log_debug(f"[CALL] {name} args={args} kwargs={kwargs}")
|
||||
log_debug(f"[CALL] {name} args={args}")
|
||||
# log_debug(f"[CALL] {name}")
|
||||
|
||||
result = func(*args, **kwargs)
|
||||
|
||||
# Log ná uitvoering
|
||||
# log_debug(f"[RETURN] {name} → {result}")
|
||||
|
||||
return result
|
||||
|
||||
return wrapper
|
||||
@ -1,94 +0,0 @@
|
||||
# ============================================================
|
||||
# File: scraper/services/audio_completion.py
|
||||
# Purpose:
|
||||
# Orchestration hook after audio completion.
|
||||
#
|
||||
# Rules (STRICT):
|
||||
# - ALWAYS read via get_book_state()
|
||||
# - Use ONLY merged counters from repository
|
||||
# - NO usage of derived status field
|
||||
# - Completion rule:
|
||||
# audio_completed < chapters_total → NOT DONE
|
||||
# ============================================================
|
||||
|
||||
from logbus.publisher import log
|
||||
from scraper.logger_decorators import logcall
|
||||
|
||||
from db.repository import (
|
||||
get_book_state,
|
||||
try_trigger_statuscheck,
|
||||
)
|
||||
|
||||
from scraper.services.status_check_service import StatusCheckService
|
||||
from scraper.tasks.m4b_tasks import queue_m4b_for_book
|
||||
|
||||
|
||||
@logcall
|
||||
def trigger_audio_completion_check(book_idx: str):
|
||||
"""
|
||||
Called after inc_audio_done() OR inc_audio_skipped().
|
||||
|
||||
Flow:
|
||||
1. Fetch canonical merged state from repository
|
||||
2. Evaluate completion via merged counters ONLY
|
||||
3. Run filesystem validation (authoritative)
|
||||
4. Apply idempotency guard
|
||||
5. Queue m4b exactly once
|
||||
"""
|
||||
|
||||
try:
|
||||
# ----------------------------------------------------
|
||||
# STEP 1 — CANONICAL MERGED STATE
|
||||
# ----------------------------------------------------
|
||||
state = get_book_state(book_idx)
|
||||
|
||||
chapters_total = int(state.get("chapters_total", 0))
|
||||
audio_done = int(state.get("audio_done", 0))
|
||||
audio_skipped = int(state.get("audio_skipped", 0))
|
||||
audio_completed = audio_done + audio_skipped
|
||||
|
||||
log(
|
||||
f"[AUDIO-COMPLETION] book={book_idx} "
|
||||
f"audio_completed={audio_completed} chapters_total={chapters_total}"
|
||||
)
|
||||
|
||||
# ----------------------------------------------------
|
||||
# STEP 2 — FAST REJECT (MERGED COUNTERS ONLY)
|
||||
# ----------------------------------------------------
|
||||
if chapters_total <= 0 or audio_completed < chapters_total:
|
||||
log(f"[AUDIO-COMPLETION] not yet complete for book={book_idx}")
|
||||
return
|
||||
|
||||
# ----------------------------------------------------
|
||||
# STEP 3 — FILESYSTEM VALIDATION (AUTHORITATIVE)
|
||||
# ----------------------------------------------------
|
||||
result = StatusCheckService.run(book_idx)
|
||||
fs = result.get("filesystem", {})
|
||||
|
||||
audio_files = fs.get("audio_files", 0)
|
||||
chapters_txt = fs.get("chapters_txt", 0)
|
||||
effective_audio = audio_files + audio_skipped
|
||||
|
||||
if effective_audio < chapters_txt:
|
||||
log(
|
||||
f"[AUDIO-COMPLETION] FS validation failed "
|
||||
f"(audio_files={audio_files}, skipped={audio_skipped}, txt={chapters_txt})"
|
||||
)
|
||||
return
|
||||
|
||||
# ----------------------------------------------------
|
||||
# STEP 4 — IDEMPOTENCY GUARD (AFTER FS CONFIRMATION)
|
||||
# ----------------------------------------------------
|
||||
if not try_trigger_statuscheck(book_idx):
|
||||
log(f"[AUDIO-COMPLETION] statuscheck already triggered for {book_idx}")
|
||||
return
|
||||
|
||||
# ----------------------------------------------------
|
||||
# STEP 5 — FINAL ACTION
|
||||
# ----------------------------------------------------
|
||||
log(f"[AUDIO-COMPLETION] DONE → queue m4b for book={book_idx}")
|
||||
queue_m4b_for_book(book_idx)
|
||||
|
||||
except Exception as exc:
|
||||
# MUST NEVER break audio workers
|
||||
log(f"[AUDIO-COMPLETION][ERROR] book={book_idx} error={exc}")
|
||||
@ -1,45 +0,0 @@
|
||||
# ============================================================
|
||||
# File: scraper/services/cover_service.py
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import requests
|
||||
from logbus.publisher import log
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class CoverService:
|
||||
|
||||
@staticmethod
|
||||
def download_main_cover(cover_url: str, book_id: str) -> Optional[str]:
|
||||
"""
|
||||
Downloads cover image into: static/covers/<book_id>.jpg.
|
||||
Returns local path or None.
|
||||
"""
|
||||
|
||||
if not cover_url:
|
||||
log(f"[COVER] No cover URL for book={book_id}")
|
||||
return None
|
||||
|
||||
static_dir = os.path.join("static", "covers")
|
||||
os.makedirs(static_dir, exist_ok=True)
|
||||
|
||||
dst_path = os.path.join(static_dir, f"{book_id}.jpg")
|
||||
|
||||
try:
|
||||
log(f"[COVER] Downloading: {cover_url}")
|
||||
|
||||
resp = requests.get(
|
||||
cover_url, timeout=10, headers={"User-Agent": "Mozilla/5.0"}
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
with open(dst_path, "wb") as f:
|
||||
f.write(resp.content)
|
||||
|
||||
log(f"[COVER] Stored: {dst_path}")
|
||||
return dst_path
|
||||
|
||||
except Exception as e:
|
||||
log(f"[COVER] FAILED ({cover_url}) → {e}")
|
||||
return None
|
||||
@ -1,95 +0,0 @@
|
||||
# ============================================================
|
||||
# File: scraper/services/init_service.py
|
||||
# Purpose:
|
||||
# Orchestrate INIT-flow:
|
||||
# - resolve site
|
||||
# - fetch minimal metadata
|
||||
# - derive book_idx
|
||||
# - register in SQLite
|
||||
# - store main cover
|
||||
# ============================================================
|
||||
|
||||
import re
|
||||
from scraper.services.site_resolver import SiteResolver
|
||||
from scraper.services.scrape_engine import ScrapeEngine
|
||||
from scraper.services.cover_service import CoverService
|
||||
|
||||
from db.repository import register_book
|
||||
|
||||
from scraper.logger_decorators import logcall
|
||||
|
||||
|
||||
class InitService:
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# BOOK IDX DERIVATION
|
||||
# ------------------------------------------------------------
|
||||
@staticmethod
|
||||
@logcall
|
||||
def derive_book_id(url: str) -> str:
|
||||
"""
|
||||
PTWXZ URL format ends with /{id}.html.
|
||||
If no match → fallback to sanitized URL.
|
||||
|
||||
Returns:
|
||||
book_idx (string)
|
||||
"""
|
||||
m = re.search(r"/(\d+)\.html$", url)
|
||||
if m:
|
||||
return m.group(1)
|
||||
|
||||
# Fallback — ensures deterministic ID for unknown formats
|
||||
return url.replace("/", "_").replace(":", "_")
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# MAIN INIT FLOW
|
||||
# ------------------------------------------------------------
|
||||
@staticmethod
|
||||
@logcall
|
||||
def execute(url: str) -> dict:
|
||||
"""
|
||||
INIT entry point.
|
||||
Returns complete metadata + registration result.
|
||||
"""
|
||||
|
||||
# 1) Resolve site handler
|
||||
site = SiteResolver.resolve(url)
|
||||
|
||||
# 2) Create unified book_idx
|
||||
book_idx = InitService.derive_book_id(url)
|
||||
|
||||
# Some site objects historically expect .book_id — we support it but DO NOT rely on it.
|
||||
site.book_id = book_idx
|
||||
|
||||
# 3) Fetch initial metadata (title/author/description/cover)
|
||||
meta = ScrapeEngine.fetch_metadata_only(site, url)
|
||||
|
||||
title = meta.get("title") or "Unknown"
|
||||
author = meta.get("author")
|
||||
description = meta.get("description")
|
||||
cover_url = meta.get("cover_url")
|
||||
|
||||
# 4) Download & store main cover for UI
|
||||
cover_path = CoverService.download_main_cover(cover_url, book_idx)
|
||||
|
||||
# 5) Register in SQLite (book_idx is the SOLE primary ID)
|
||||
register_book(
|
||||
book_idx=book_idx,
|
||||
title=title,
|
||||
author=author,
|
||||
description=description,
|
||||
cover_url=cover_url,
|
||||
cover_path=cover_path,
|
||||
book_url=url,
|
||||
)
|
||||
|
||||
# 6) Return metadata for UI / API
|
||||
return {
|
||||
"book_idx": book_idx,
|
||||
"title": title,
|
||||
"author": author,
|
||||
"description": description,
|
||||
"cover_url": cover_url,
|
||||
"cover_path": cover_path,
|
||||
"status": "registered",
|
||||
}
|
||||
@ -1,20 +0,0 @@
|
||||
# ============================================================
|
||||
# File: scraper/services/site_resolver.py
|
||||
# Purpose:
|
||||
# Determine which BookSite implementation applies for a given URL.
|
||||
# This keeps INIT-flow and SCRAPE-flow site-agnostic.
|
||||
# ============================================================
|
||||
|
||||
from scraper.sites import BookSite # current PTWXZ implementation
|
||||
|
||||
|
||||
class SiteResolver:
|
||||
"""
|
||||
Resolves the correct BookSite class based on URL.
|
||||
Currently only PTWXZ/Piaotian is supported.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def resolve(url: str):
|
||||
# Later: add more domain rules for other sources
|
||||
return BookSite()
|
||||
@ -1,135 +0,0 @@
|
||||
# ============================================================
|
||||
# File: scraper/services/status_check_service.py
|
||||
# Purpose:
|
||||
# Handmatige, idempotente statuscheck per boek.
|
||||
#
|
||||
# Bepaalt op basis van het filesystem:
|
||||
# - aantal gedownloade chapters (.txt)
|
||||
# - aantal gegenereerde audiofiles (.m4b)
|
||||
#
|
||||
# En schrijft deze gevalideerde werkelijkheid naar SQL.
|
||||
#
|
||||
# LET OP:
|
||||
# - Geen Redis
|
||||
# - Geen Celery
|
||||
# - Geen status-transities
|
||||
# - Geen pipeline-logica
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any
|
||||
|
||||
from logbus.publisher import log
|
||||
from scraper.logger_decorators import logcall
|
||||
|
||||
from db.state_sql import sql_fetch_book, sql_update_book
|
||||
|
||||
|
||||
class StatusCheckService:
|
||||
"""
|
||||
Statuscheck op basis van filesystem.
|
||||
Single source of truth = disk.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
@logcall
|
||||
def run(book_idx: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Voer statuscheck uit voor één boek.
|
||||
|
||||
Returns een inspecteerbaar dict met:
|
||||
- filesystem tellingen
|
||||
- SQL before / after snapshot
|
||||
"""
|
||||
|
||||
# ----------------------------------------------------
|
||||
# 1. SQL fetch (bestaat het boek?)
|
||||
# ----------------------------------------------------
|
||||
sql_before = sql_fetch_book(book_idx)
|
||||
|
||||
if not sql_before:
|
||||
raise ValueError(f"[STATUSCHECK] Book not found in SQL: {book_idx}")
|
||||
|
||||
# ----------------------------------------------------
|
||||
# 2. Bepaal filesystem root
|
||||
# ----------------------------------------------------
|
||||
output_root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
|
||||
title = sql_before.get("title")
|
||||
book_dir = os.path.join(output_root, title)
|
||||
|
||||
if not os.path.isdir(book_dir):
|
||||
log(
|
||||
f"[STATUSCHECK] No output directory for book_idx={book_idx} : title='{title}')"
|
||||
)
|
||||
chapters_txt = 0
|
||||
audio_files = 0
|
||||
volumes = 0
|
||||
else:
|
||||
chapters_txt = 0
|
||||
audio_files = 0
|
||||
volumes = 0
|
||||
|
||||
# ------------------------------------------------
|
||||
# 3. Scan volumes
|
||||
# ------------------------------------------------
|
||||
for entry in os.listdir(book_dir):
|
||||
if not entry.lower().startswith("volume_"):
|
||||
continue
|
||||
|
||||
volumes += 1
|
||||
volume_path = os.path.join(book_dir, entry)
|
||||
|
||||
if not os.path.isdir(volume_path):
|
||||
continue
|
||||
|
||||
# ---- TXT chapters ----
|
||||
for fname in os.listdir(volume_path):
|
||||
if fname.lower().endswith(".txt"):
|
||||
chapters_txt += 1
|
||||
|
||||
# ---- Audio ----
|
||||
audio_dir = os.path.join(volume_path, "Audio")
|
||||
if os.path.isdir(audio_dir):
|
||||
for fname in os.listdir(audio_dir):
|
||||
if fname.lower().endswith(".m4b"):
|
||||
audio_files += 1
|
||||
|
||||
# ----------------------------------------------------
|
||||
# 4. SQL update (snapshot)
|
||||
# ----------------------------------------------------
|
||||
now = datetime.utcnow().isoformat(timespec="seconds")
|
||||
|
||||
update_fields = {
|
||||
"downloaded": chapters_txt,
|
||||
"audio_done": audio_files,
|
||||
"last_update": now,
|
||||
}
|
||||
|
||||
sql_update_book(book_idx, update_fields)
|
||||
|
||||
sql_after = sql_fetch_book(book_idx)
|
||||
|
||||
# ----------------------------------------------------
|
||||
# 5. Resultaat voor inspect/debug
|
||||
# ----------------------------------------------------
|
||||
result = {
|
||||
"book_idx": book_idx,
|
||||
"filesystem": {
|
||||
"book_dir": book_dir,
|
||||
"exists": os.path.isdir(book_dir),
|
||||
"volumes": volumes,
|
||||
"chapters_txt": chapters_txt,
|
||||
"audio_files": audio_files,
|
||||
},
|
||||
"sql_before": sql_before,
|
||||
"sql_after": sql_after,
|
||||
"notes": [],
|
||||
}
|
||||
|
||||
log(
|
||||
f"[STATUSCHECK] book_idx={book_idx} "
|
||||
f"chapters={chapters_txt} audio={audio_files}"
|
||||
)
|
||||
|
||||
return result
|
||||
@ -1,28 +0,0 @@
|
||||
# ============================================================
|
||||
# File: scraper/sites/__init__.py
|
||||
# Purpose:
|
||||
# Site autodetection based on URL.
|
||||
# ============================================================
|
||||
|
||||
from scraper.sites.piaotian import PiaotianScraper
|
||||
|
||||
|
||||
def get_scraper_for_url(url: str):
|
||||
"""
|
||||
Return the correct scraper instance for a given URL.
|
||||
Later: add more site implementations.
|
||||
"""
|
||||
if "ptwxz" in url or "piaotian" in url:
|
||||
return PiaotianScraper()
|
||||
|
||||
raise ValueError(f"No scraper available for URL: {url}")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Backwards-compatibility export for legacy BookScraper
|
||||
# ============================================================
|
||||
# Old code expects:
|
||||
# from scraper.sites import BookSite
|
||||
# We map that to our new PiaotianScraper implementation.
|
||||
|
||||
BookSite = PiaotianScraper
|
||||
@ -1,52 +0,0 @@
|
||||
# ============================================================
|
||||
# File: scraper/sites/base.py
|
||||
# Purpose:
|
||||
# Abstract interface that every site-specific scraper must implement.
|
||||
# ============================================================
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class SiteScraper(ABC):
|
||||
"""
|
||||
Defines the interface for site-specific scrapers.
|
||||
Each concrete scraper (Piaotian, Biquge, etc.) must implement these.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def root(self) -> str: ...
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def encoding(self) -> str: ...
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def chapter_list_selector(self) -> str: ...
|
||||
|
||||
# --------------------------
|
||||
# Metadata extraction
|
||||
# --------------------------
|
||||
@abstractmethod
|
||||
def parse_title(self, soup: BeautifulSoup) -> str: ...
|
||||
|
||||
@abstractmethod
|
||||
def parse_author(self, soup: BeautifulSoup) -> str: ...
|
||||
|
||||
@abstractmethod
|
||||
def parse_description(self, soup: BeautifulSoup) -> str: ...
|
||||
|
||||
@abstractmethod
|
||||
def parse_cover(self, soup: BeautifulSoup, url: str) -> Optional[str]: ...
|
||||
|
||||
# --------------------------
|
||||
# Chapter extraction
|
||||
# --------------------------
|
||||
@abstractmethod
|
||||
def extract_chapter_page_url(self, soup: BeautifulSoup) -> str: ...
|
||||
|
||||
@abstractmethod
|
||||
def parse_chapter_list(self, soup: BeautifulSoup) -> list: ...
|
||||
@ -1,167 +1,106 @@
|
||||
# ============================================================
|
||||
# File: scraper/tasks/controller_tasks.py
|
||||
# Purpose:
|
||||
# FULL scrape entrypoint + launching download/parse/save pipelines.
|
||||
# NO result.get() anywhere. Scraping is done inline.
|
||||
# Start the download → parse → save pipeline for a scraped book,
|
||||
# including progress/abort tracking via book_id.
|
||||
# ONLY THE CONTROLLER UPDATES PROGRESS (initial total).
|
||||
# ============================================================
|
||||
|
||||
from celery_app import celery_app
|
||||
from logbus.publisher import log
|
||||
|
||||
import os
|
||||
import time
|
||||
import redis
|
||||
from scraper.download_controller import DownloadController
|
||||
from scraper.progress import (
|
||||
set_total,
|
||||
)
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from scraper.logger_decorators import logcall
|
||||
import redis
|
||||
import os
|
||||
from scraper.abort import abort_requested
|
||||
|
||||
from scraper.services.scrape_engine import ScrapeEngine
|
||||
from scraper.services.site_resolver import SiteResolver
|
||||
|
||||
from db.repository import fetch_book, set_chapters_total
|
||||
from scraper.download_controller import DownloadController
|
||||
|
||||
|
||||
print(">>> [IMPORT] controller_tasks.py loaded")
|
||||
|
||||
|
||||
# =============================================================
|
||||
# 1) PUBLIC ENTRYPOINT — CALLED FROM /start
|
||||
# =============================================================
|
||||
@celery_app.task(
|
||||
bind=True,
|
||||
queue="controller",
|
||||
ignore_result=False,
|
||||
name="scraper.tasks.controller_tasks.start_full_scrape",
|
||||
)
|
||||
@logcall
|
||||
def start_full_scrape(self, book_idx: str):
|
||||
@celery_app.task(bind=True, queue="controller", ignore_result=False)
|
||||
def launch_downloads(self, book_id: str, scrape_result: dict):
|
||||
"""
|
||||
FULL SCRAPE ENTRYPOINT.
|
||||
Scraping is done inline → no Celery .get() needed.
|
||||
"""
|
||||
|
||||
log(f"[CTRL] start_full_scrape(book_idx={book_idx})")
|
||||
|
||||
# Abort before doing anything
|
||||
if abort_requested(book_idx):
|
||||
log(f"[CTRL] PRE-ABORT flag detected for {book_idx}")
|
||||
return {"book_idx": book_idx, "aborted": True, "reason": "pre-abort"}
|
||||
|
||||
# --------------------------------------------------------
|
||||
# 1) Load book metadata from SQLite
|
||||
# --------------------------------------------------------
|
||||
book = fetch_book(book_idx)
|
||||
if not book:
|
||||
msg = f"[CTRL] Book '{book_idx}' not found in DB"
|
||||
log(msg)
|
||||
raise ValueError(msg)
|
||||
|
||||
url = book.get("book_url")
|
||||
if not url:
|
||||
msg = f"[CTRL] No book_url stored for {book_idx}"
|
||||
log(msg)
|
||||
raise ValueError(msg)
|
||||
|
||||
# --------------------------------------------------------
|
||||
# 2) INLINE SCRAPE (fast, no Celery wait)
|
||||
# --------------------------------------------------------
|
||||
site = SiteResolver.resolve(url)
|
||||
|
||||
try:
|
||||
scrape_result = ScrapeEngine.fetch_metadata_and_chapters(site, url)
|
||||
log(f"[CTRL] Scrape OK for {book_idx}: {scrape_result.get('title')}")
|
||||
except Exception as e:
|
||||
log(f"[CTRL] ERROR during scrape of {book_idx}: {e}")
|
||||
raise
|
||||
|
||||
# --------------------------------------------------------
|
||||
# 3) Continue → dispatch pipelines
|
||||
# --------------------------------------------------------
|
||||
return launch_downloads(book_idx, scrape_result)
|
||||
Launch the entire pipeline (download → parse → save),
|
||||
AND initialize progress counters.
|
||||
|
||||
|
||||
# =============================================================
|
||||
# 2) PIPELINE DISPATCH (NOT a Celery task)
|
||||
# =============================================================
|
||||
@logcall
|
||||
def launch_downloads(book_idx: str, scrape_result: dict):
|
||||
"""
|
||||
Launches the entire processing pipeline:
|
||||
- initialize Redis UI state
|
||||
- initialize SQLite totals
|
||||
- dispatch per-chapter pipelines via DownloadController
|
||||
Chapter-level progress is updated INSIDE the download/parse/save tasks.
|
||||
This task MUST NOT call .get() on async subtasks (Celery restriction).
|
||||
"""
|
||||
|
||||
title = scrape_result.get("title", "UnknownBook")
|
||||
chapters = scrape_result.get("chapters", []) or []
|
||||
total = len(chapters)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# INIT REDIS STATE
|
||||
# INIT BOOK STATE MODEL (required for Active Books dashboard)
|
||||
# ------------------------------------------------------------
|
||||
|
||||
broker_url = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
|
||||
parsed = urlparse(broker_url)
|
||||
|
||||
r = redis.Redis(
|
||||
state = redis.Redis(
|
||||
host=parsed.hostname,
|
||||
port=parsed.port,
|
||||
db=int(parsed.path.strip("/")),
|
||||
decode_responses=True,
|
||||
)
|
||||
|
||||
base = f"book:{book_idx}:state"
|
||||
# Book metadata
|
||||
state.set(f"book:{book_id}:title", title)
|
||||
state.set(f"book:{book_id}:status", "starting")
|
||||
|
||||
r.hset(base, "title", title)
|
||||
r.hset(base, "status", "starting")
|
||||
r.hset(base, "chapters_total", total)
|
||||
r.hset(base, "chapters_download_done", 0)
|
||||
r.hset(base, "chapters_download_skipped", 0)
|
||||
r.hset(base, "chapters_parsed_done", 0)
|
||||
r.hset(base, "audio_done", 0)
|
||||
r.hset(base, "audio_skipped", 0)
|
||||
r.hset(base, "last_update", int(time.time()))
|
||||
# Download counters
|
||||
state.set(f"book:{book_id}:download:total", total)
|
||||
state.set(f"book:{book_id}:download:done", 0)
|
||||
|
||||
# Audio counters (start at zero)
|
||||
state.set(f"book:{book_id}:audio:done", 0)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# INIT SQLITE SNAPSHOT
|
||||
# INIT PROGRESS
|
||||
# ------------------------------------------------------------
|
||||
try:
|
||||
set_chapters_total(book_idx, total)
|
||||
except Exception as e:
|
||||
log(f"[CTRL] ERROR updating SQLite totals: {e}")
|
||||
raise
|
||||
|
||||
log(f"[CTRL] Initialized totals for {book_idx}: {total}")
|
||||
set_total(book_id, total)
|
||||
log(f"[CTRL] Progress initialized for {book_id}: total={total}")
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# ABORT CHECK BEFORE LAUNCHING JOBS
|
||||
# BUILD CONTROLLER
|
||||
# ------------------------------------------------------------
|
||||
if abort_requested(book_idx):
|
||||
log(f"[CTRL] ABORT flag detected — stopping BEFORE dispatch for {book_idx}")
|
||||
r.hset(base, "status", "aborted")
|
||||
return {"book_idx": book_idx, "aborted": True, "reason": "abort-before-start"}
|
||||
ctl = DownloadController(book_id, scrape_result)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# BUILD + DISPATCH PER-CHAPTER PIPELINES
|
||||
# START PIPELINES (ASYNC)
|
||||
# Returns a celery group AsyncResult. We DO NOT iterate or get().
|
||||
# Progress & failures are handled by the worker subtasks.
|
||||
# ------------------------------------------------------------
|
||||
controller = DownloadController(book_idx, scrape_result)
|
||||
|
||||
try:
|
||||
group_result = controller.start()
|
||||
gid = getattr(group_result, "id", None)
|
||||
log(f"[CTRL] Pipelines dispatched for {book_idx} (group_id={gid})")
|
||||
except Exception as e:
|
||||
log(f"[CTRL] ERROR dispatching pipelines for {book_idx}: {e}")
|
||||
group_result = ctl.start()
|
||||
|
||||
log(
|
||||
f"[CTRL] Pipelines dispatched for '{title}' "
|
||||
f"(book_id={book_id}, group_id={group_result.id})"
|
||||
)
|
||||
|
||||
# Abort flag set BEFORE tasks start?
|
||||
if abort_requested(book_id):
|
||||
log(f"[CTRL] ABORT requested before tasks start")
|
||||
return {"book_id": book_id, "aborted": True}
|
||||
|
||||
except Exception as exc:
|
||||
log(f"[CTRL] ERROR while dispatching pipelines: {exc}")
|
||||
raise
|
||||
|
||||
# Update UI state to "downloading"
|
||||
r.hset(base, "status", "downloading")
|
||||
r.hset(base, "last_update", int(time.time()))
|
||||
# ------------------------------------------------------------
|
||||
# CONTROLLER DOES NOT WAIT FOR SUBTASK RESULTS
|
||||
# (Download/parse/save tasks update progress themselves)
|
||||
# ------------------------------------------------------------
|
||||
log(f"[CTRL] Controller finished dispatch for book_id={book_id}")
|
||||
|
||||
return {
|
||||
"book_idx": book_idx,
|
||||
"book_id": book_id,
|
||||
"total": total,
|
||||
"started": True,
|
||||
"group_id": gid,
|
||||
"group_id": group_result.id,
|
||||
}
|
||||
|
||||
@ -1,132 +0,0 @@
|
||||
# ============================================================
|
||||
# File: scraper/tasks/m4b_tasks.py
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from typing import List
|
||||
|
||||
from celery_app import celery_app
|
||||
from logbus.publisher import log
|
||||
from scraper.logger_decorators import logcall
|
||||
|
||||
from db.repository import fetch_book, store_m4b_error
|
||||
from scraper.scriptgen import build_merge_block
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Helper: detect volumes (UNCHANGED)
|
||||
# ------------------------------------------------------------
|
||||
def detect_volumes(book_base: str) -> List[str]:
|
||||
volumes = []
|
||||
for name in os.listdir(book_base):
|
||||
if name.lower().startswith("volume_"):
|
||||
full = os.path.join(book_base, name)
|
||||
if os.path.isdir(full):
|
||||
volumes.append(name)
|
||||
volumes.sort()
|
||||
return volumes
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Celery task
|
||||
# ------------------------------------------------------------
|
||||
@celery_app.task(bind=True, queue="m4b", ignore_result=True)
|
||||
@logcall
|
||||
def run_m4btool(self, book_idx: str):
|
||||
|
||||
log(f"[M4B] START book_idx={book_idx}")
|
||||
|
||||
book = fetch_book(book_idx)
|
||||
if not book:
|
||||
log(f"[M4B] Book not found in SQL: book_idx={book_idx}")
|
||||
return
|
||||
|
||||
title = book.get("title", book_idx)
|
||||
author = book.get("author", "Unknown")
|
||||
|
||||
output_root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
|
||||
book_base = os.path.join(output_root, title)
|
||||
|
||||
log(f"[M4B] Book base directory: {book_base}")
|
||||
|
||||
if not os.path.isdir(book_base):
|
||||
log(f"[M4B] Book directory missing: {book_base}")
|
||||
return
|
||||
|
||||
volumes = detect_volumes(book_base)
|
||||
if not volumes:
|
||||
log(f"[M4B] No volumes found for book_idx={book_idx}")
|
||||
return
|
||||
|
||||
log(f"[M4B] Volumes detected: {volumes}")
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Build canonical commands via scriptgen
|
||||
# --------------------------------------------------------
|
||||
merge_block = build_merge_block(
|
||||
title, author, [(i + 1, v) for i, v in enumerate(volumes)]
|
||||
)
|
||||
commands = [c.strip() for c in merge_block.split("&&") if c.strip()]
|
||||
|
||||
for volume, cmd in zip(volumes, commands):
|
||||
audio_dir = os.path.join(book_base, volume, "Audio")
|
||||
if not os.path.isdir(audio_dir):
|
||||
log(f"[M4B] SKIP {volume}: no Audio directory")
|
||||
continue
|
||||
|
||||
log(f"[M4B] Running for volume={volume}")
|
||||
log(f"[M4B] CMD: {cmd}")
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
cwd=book_base,
|
||||
shell=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
|
||||
if result.stdout:
|
||||
log(f"[M4B][STDOUT] {result.stdout}")
|
||||
|
||||
except subprocess.CalledProcessError as exc:
|
||||
log(f"[M4B][FAILED] volume={volume}")
|
||||
|
||||
if exc.stdout:
|
||||
log(f"[M4B][STDOUT] {exc.stdout}")
|
||||
if exc.stderr:
|
||||
log(f"[M4B][STDERR] {exc.stderr}")
|
||||
|
||||
store_m4b_error(
|
||||
book_idx=book_idx,
|
||||
volume=volume,
|
||||
error_text=exc.stderr or str(exc),
|
||||
)
|
||||
continue
|
||||
|
||||
except Exception as exc:
|
||||
log(f"[M4B][UNEXPECTED ERROR] volume={volume}: {exc}")
|
||||
|
||||
store_m4b_error(
|
||||
book_idx=book_idx,
|
||||
volume=volume,
|
||||
error_text=str(exc),
|
||||
)
|
||||
continue
|
||||
|
||||
log(f"[M4B] FINISHED book_idx={book_idx}")
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Orchestration helper (UNCHANGED)
|
||||
# ------------------------------------------------------------
|
||||
@logcall
|
||||
def queue_m4b_for_book(book_idx: str):
|
||||
log(f"[M4B] Queuing m4b-tool for book_idx={book_idx}")
|
||||
celery_app.send_task(
|
||||
"scraper.tasks.m4b_tasks.run_m4btool",
|
||||
args=[book_idx],
|
||||
queue="m4b",
|
||||
)
|
||||
@ -0,0 +1,57 @@
|
||||
# ============================================================
|
||||
# File: scraper/tasks/progress_tasks.py
|
||||
# Purpose: Central progress updater for chapter pipelines.
|
||||
# Updated for chapter_dict pipeline model.
|
||||
# ============================================================
|
||||
|
||||
from celery_app import celery_app
|
||||
from scraper.progress import inc_completed, inc_skipped, inc_failed
|
||||
from logbus.publisher import log
|
||||
|
||||
print(">>> [IMPORT] progress_tasks.py loaded")
|
||||
|
||||
|
||||
@celery_app.task(bind=False, name="progress.update", queue="controller")
|
||||
def update_progress(result: dict, book_id: str):
|
||||
"""
|
||||
Central progress logic:
|
||||
- result: output of save_chapter
|
||||
- book_id: explicitly passed by pipeline
|
||||
|
||||
IMPORTANT:
|
||||
- save_chapter already updates counters for skipped & normal chapters
|
||||
- progress.update MUST NOT double-increment
|
||||
"""
|
||||
|
||||
ch = result.get("chapter") or {}
|
||||
chapter_num = ch.get("num")
|
||||
|
||||
skipped = result.get("skipped", False)
|
||||
failed = result.get("failed", False)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# FAILED CASE
|
||||
# ------------------------------------------------------------
|
||||
if failed:
|
||||
inc_failed(book_id)
|
||||
log(f"[PROG] FAILED chapter {chapter_num}")
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# SKIPPED CASE
|
||||
# ------------------------------------------------------------
|
||||
if skipped:
|
||||
# save_chapter already did:
|
||||
# inc_skipped(book_id)
|
||||
log(f"[PROG] SKIPPED chapter {chapter_num}")
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# NORMAL COMPLETION
|
||||
# ------------------------------------------------------------
|
||||
# save_chapter did NOT increment completed for skipped cases
|
||||
# but DID inc_completed(book_id) for normal cases.
|
||||
# update_progress should NOT double increment, so only log here.
|
||||
log(f"[PROG] DONE chapter {chapter_num}")
|
||||
|
||||
return result
|
||||
@ -1,84 +1,139 @@
|
||||
# ============================================================
|
||||
# File: scraper/tasks/save_tasks.py (RESTORED AUDIO LOGIC + book_idx)
|
||||
# File: scraper/tasks/save_tasks.py
|
||||
# Purpose: Save parsed chapter text to disk + trigger audio.
|
||||
# Updated for chapter_dict + book_meta pipeline model.
|
||||
# ============================================================
|
||||
|
||||
print(">>> [IMPORT] save_tasks.py loaded")
|
||||
|
||||
from celery import shared_task
|
||||
import os
|
||||
|
||||
from logbus.publisher import log
|
||||
from scraper.logger_decorators import logcall
|
||||
from scraper.utils.utils import get_save_path
|
||||
from scraper.tasks.download_tasks import log_msg
|
||||
from scraper.utils import get_save_path
|
||||
from scraper.tasks.download_tasks import log_msg # unified logger
|
||||
from scraper.progress import (
|
||||
inc_completed,
|
||||
inc_chapter_done,
|
||||
inc_chapter_download_skipped,
|
||||
)
|
||||
from scraper.tasks.audio_tasks import generate_audio
|
||||
|
||||
from db.repository import inc_download_done, inc_download_skipped
|
||||
|
||||
|
||||
@shared_task(bind=True, queue="save", ignore_result=False)
|
||||
@logcall
|
||||
def save_chapter(self, payload: dict):
|
||||
|
||||
if not payload:
|
||||
log("[SAVE] ERROR: payload is None")
|
||||
return {"error": True}
|
||||
|
||||
# NEW unified ID
|
||||
book_idx = payload["book_idx"]
|
||||
|
||||
chapter = payload["chapter"]
|
||||
parsed = payload.get("parsed")
|
||||
path = payload.get("path")
|
||||
skipped = payload.get("skipped")
|
||||
|
||||
num = chapter["num"]
|
||||
title = chapter.get("title") or f"Chapter {num}"
|
||||
volume = chapter.get("volume_path")
|
||||
volume_name = os.path.basename(volume.rstrip("/"))
|
||||
|
||||
# ============================================================
|
||||
# SKIPPED CASE (old behavior restored)
|
||||
# ============================================================
|
||||
if skipped or not parsed:
|
||||
log_msg(book_idx, f"[SAVE] SKIP chapter {num}")
|
||||
inc_download_skipped(book_idx)
|
||||
|
||||
# OLD behavior: even skipped chapters still queue audio
|
||||
def save_chapter(self, parsed: dict):
|
||||
"""
|
||||
New pipeline model:
|
||||
parsed = {
|
||||
"book_id": str,
|
||||
"chapter": chapter_dict,
|
||||
"text": str,
|
||||
"length": int,
|
||||
"book_meta": dict,
|
||||
"skipped": bool,
|
||||
"path": optional str (if skipped)
|
||||
}
|
||||
"""
|
||||
|
||||
book_id = parsed.get("book_id", "NOBOOK")
|
||||
chapter_dict = parsed.get("chapter") or {}
|
||||
book_meta = parsed.get("book_meta") or {}
|
||||
|
||||
chapter_num = chapter_dict.get("num")
|
||||
chapter_title = chapter_dict.get("title") or f"Chapter {chapter_num}"
|
||||
volume_path = chapter_dict.get("volume_path")
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# VALIDATION
|
||||
# ------------------------------------------------------------
|
||||
if chapter_num is None or volume_path is None:
|
||||
raise ValueError("Invalid parsed payload: chapter_dict missing fields.")
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# SKIPPED CASE
|
||||
# ------------------------------------------------------------
|
||||
if parsed.get("skipped"):
|
||||
path = parsed.get("path", None)
|
||||
log_msg(book_id, f"[SAVE] SKIP chapter {chapter_num} → {path}")
|
||||
|
||||
inc_chapter_download_skipped(book_id)
|
||||
|
||||
volume_name = os.path.basename(volume_path.rstrip("/"))
|
||||
|
||||
# Queue audio only if a valid file exists
|
||||
if path and os.path.exists(path):
|
||||
log_msg(book_idx, f"[AUDIO] Queueing audio for SKIPPED chapter {num}")
|
||||
try:
|
||||
generate_audio.delay(book_idx, volume_name, num, title, path)
|
||||
except Exception as exc:
|
||||
log_msg(book_idx, f"[AUDIO] ERROR queueing skipped audio: {exc}")
|
||||
|
||||
return payload
|
||||
|
||||
# ============================================================
|
||||
generate_audio.delay(
|
||||
book_id,
|
||||
volume_name,
|
||||
chapter_num,
|
||||
chapter_title,
|
||||
path,
|
||||
)
|
||||
log_msg(
|
||||
book_id,
|
||||
f"[AUDIO] Task queued (SKIPPED) for chapter {chapter_num} in {volume_name}",
|
||||
)
|
||||
except Exception as audio_exc:
|
||||
log_msg(
|
||||
book_id,
|
||||
f"[AUDIO] ERROR queueing (SKIPPED) chapter {chapter_num}: {audio_exc}",
|
||||
)
|
||||
|
||||
return {
|
||||
"book_id": book_id,
|
||||
"chapter": chapter_dict,
|
||||
"path": path,
|
||||
"skipped": True,
|
||||
"book_meta": book_meta,
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# NORMAL SAVE CASE
|
||||
# ============================================================
|
||||
# ------------------------------------------------------------
|
||||
try:
|
||||
os.makedirs(volume, exist_ok=True)
|
||||
save_path = get_save_path(num, volume)
|
||||
text = parsed.get("text", "")
|
||||
|
||||
with open(save_path, "w", encoding="utf-8") as f:
|
||||
f.write(parsed)
|
||||
# Ensure volume folder exists
|
||||
os.makedirs(volume_path, exist_ok=True)
|
||||
|
||||
log_msg(book_idx, f"[SAVE] Saved chapter {num} → {save_path}")
|
||||
# Build final chapter file path
|
||||
path = get_save_path(chapter_num, volume_path)
|
||||
|
||||
inc_download_done(book_idx)
|
||||
# Write chapter text to file
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(text)
|
||||
|
||||
# OLD behavior: ALWAYS queue audio
|
||||
try:
|
||||
generate_audio.delay(book_idx, volume_name, num, title, save_path)
|
||||
log_msg(book_idx, f"[AUDIO] Task queued for chapter {num}")
|
||||
except Exception as exc:
|
||||
log_msg(book_idx, f"[AUDIO] ERROR queueing chapter {num}: {exc}")
|
||||
log_msg(book_id, f"[SAVE] Saved chapter {chapter_num} → {path}")
|
||||
inc_chapter_done(book_id)
|
||||
inc_completed(book_id)
|
||||
|
||||
# Determine volume name
|
||||
volume_name = os.path.basename(volume_path.rstrip("/"))
|
||||
|
||||
payload["path"] = save_path
|
||||
payload["skipped"] = False
|
||||
return payload
|
||||
# Queue audio task
|
||||
try:
|
||||
generate_audio.delay(
|
||||
book_id,
|
||||
volume_name,
|
||||
chapter_num,
|
||||
chapter_title,
|
||||
path,
|
||||
)
|
||||
log_msg(
|
||||
book_id,
|
||||
f"[AUDIO] Task queued for chapter {chapter_num} in {volume_name}",
|
||||
)
|
||||
except Exception as audio_exc:
|
||||
log_msg(
|
||||
book_id, f"[AUDIO] ERROR queueing chapter {chapter_num}: {audio_exc}"
|
||||
)
|
||||
|
||||
return {
|
||||
"book_id": book_id,
|
||||
"chapter": chapter_dict,
|
||||
"path": path,
|
||||
"book_meta": book_meta,
|
||||
}
|
||||
|
||||
except Exception as exc:
|
||||
log_msg(book_idx, f"[SAVE] ERROR saving chapter {num}: {exc}")
|
||||
log_msg(book_id, f"[SAVE] ERROR saving chapter {chapter_num}: {exc}")
|
||||
raise
|
||||
|
||||
@ -1,149 +0,0 @@
|
||||
# ============================================================
|
||||
# File: scraper/tasks/statuscheck.py
|
||||
# Purpose:
|
||||
# Final status check after audio completion.
|
||||
#
|
||||
# Responsibilities:
|
||||
# - Verify Redis counters (sanity check)
|
||||
# - Verify filesystem (Audio files present)
|
||||
# - Queue m4btool task
|
||||
#
|
||||
# Design rules:
|
||||
# - Book-scope ONLY
|
||||
# - No direct Redis usage
|
||||
# - Repository is the single source of truth
|
||||
# - Idempotent, defensive, non-blocking
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
from celery_app import celery_app
|
||||
from logbus.publisher import log
|
||||
|
||||
from scraper.logger_decorators import logcall
|
||||
|
||||
from db.repository import (
|
||||
get_audio_done,
|
||||
get_chapters_total,
|
||||
set_status,
|
||||
fetch_book,
|
||||
)
|
||||
|
||||
from scraper.tasks.m4b_tasks import run_m4btool
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Helpers
|
||||
# ------------------------------------------------------------
|
||||
@log
|
||||
def _detect_volumes(book_base: str):
|
||||
"""
|
||||
Return sorted list of Volume_XXX directories.
|
||||
"""
|
||||
vols = []
|
||||
for name in os.listdir(book_base):
|
||||
if name.lower().startswith("volume_"):
|
||||
full = os.path.join(book_base, name)
|
||||
if os.path.isdir(full):
|
||||
vols.append(name)
|
||||
vols.sort()
|
||||
return vols
|
||||
|
||||
|
||||
@logcall
|
||||
def _count_audio_files(audio_dir: str) -> int:
|
||||
"""
|
||||
Count .m4b files in an Audio directory.
|
||||
"""
|
||||
if not os.path.isdir(audio_dir):
|
||||
return 0
|
||||
return len([f for f in os.listdir(audio_dir) if f.lower().endswith(".m4b")])
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Celery task
|
||||
# ------------------------------------------------------------
|
||||
@celery_app.task(bind=True, queue="controller", ignore_result=True)
|
||||
@logcall
|
||||
def run_statuscheck(self, book_idx: str):
|
||||
"""
|
||||
Final statuscheck before m4btool execution.
|
||||
|
||||
Triggered exactly once by audio_completion quickcheck.
|
||||
"""
|
||||
|
||||
log(f"[STATUSCHECK] START book={book_idx}")
|
||||
|
||||
# --------------------------------------------------------
|
||||
# 1. Redis sanity check (via repository)
|
||||
# --------------------------------------------------------
|
||||
audio_done = get_audio_done(book_idx)
|
||||
chapters_total = get_chapters_total(book_idx)
|
||||
|
||||
log(
|
||||
f"[STATUSCHECK] Counters book={book_idx} "
|
||||
f"audio_done={audio_done} chapters_total={chapters_total}"
|
||||
)
|
||||
|
||||
if chapters_total <= 0:
|
||||
log(f"[STATUSCHECK] No chapters_total → abort")
|
||||
return
|
||||
|
||||
if audio_done < chapters_total:
|
||||
# Defensive: should not happen, but never assume
|
||||
log(
|
||||
f"[STATUSCHECK] Audio not complete yet "
|
||||
f"({audio_done}/{chapters_total}) → abort"
|
||||
)
|
||||
return
|
||||
|
||||
# --------------------------------------------------------
|
||||
# 2. Fetch book metadata (for paths & m4b meta)
|
||||
# --------------------------------------------------------
|
||||
book = fetch_book(book_idx)
|
||||
if not book:
|
||||
log(f"[STATUSCHECK] Book not found in DB: {book_idx}")
|
||||
return
|
||||
|
||||
title = book.get("title") or book_idx
|
||||
author = book.get("author") or "Unknown"
|
||||
|
||||
# Base output directory
|
||||
root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
|
||||
book_base = os.path.join(root, title)
|
||||
|
||||
if not os.path.isdir(book_base):
|
||||
log(f"[STATUSCHECK] Book directory missing: {book_base}")
|
||||
return
|
||||
|
||||
# --------------------------------------------------------
|
||||
# 3. Filesystem validation (light, non-blocking)
|
||||
# --------------------------------------------------------
|
||||
volumes = _detect_volumes(book_base)
|
||||
|
||||
if not volumes:
|
||||
log(f"[STATUSCHECK] No volumes found for {book_idx}")
|
||||
# Still allow m4btool to decide (it will no-op)
|
||||
else:
|
||||
for vol in volumes:
|
||||
audio_dir = os.path.join(book_base, vol, "Audio")
|
||||
count = _count_audio_files(audio_dir)
|
||||
|
||||
log(f"[STATUSCHECK] {vol}: " f"{count} audio files detected")
|
||||
|
||||
# --------------------------------------------------------
|
||||
# 4. Queue m4btool (final pipeline step)
|
||||
# --------------------------------------------------------
|
||||
log(f"[STATUSCHECK] Queue m4btool for book={book_idx}")
|
||||
|
||||
set_status(book_idx, "m4b_running")
|
||||
|
||||
run_m4btool.delay(
|
||||
book_idx=book_idx,
|
||||
book_base=book_base,
|
||||
meta={
|
||||
"title": title,
|
||||
"author": author,
|
||||
},
|
||||
)
|
||||
|
||||
log(f"[STATUSCHECK] DONE book={book_idx}")
|
||||
@ -1,272 +0,0 @@
|
||||
# ============================================================
|
||||
# File: scraper/utils/state_sync.py
|
||||
# Purpose:
|
||||
# State inspection + optional sync logic for unified book_idx model.
|
||||
# Generates full book-card compatible dicts for debug UI.
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import redis
|
||||
from db.db import get_db
|
||||
|
||||
|
||||
def _build_card(sqlite_row, redis_state, merged):
|
||||
"""
|
||||
Creates a dict that matches the fields required by components/bookcard.html:
|
||||
b.book_idx
|
||||
b.title
|
||||
b.author
|
||||
b.cover_path
|
||||
b.status
|
||||
b.created_at
|
||||
b.download_done
|
||||
b.download_total
|
||||
b.audio_done
|
||||
b.audio_total
|
||||
"""
|
||||
|
||||
return {
|
||||
"book_idx": sqlite_row.get("book_idx"),
|
||||
"title": sqlite_row.get("title") or "Unknown",
|
||||
"author": sqlite_row.get("author"),
|
||||
"cover_path": sqlite_row.get("cover_path"),
|
||||
# Use merged status (Redis > SQLite)
|
||||
"status": merged.get("status") or sqlite_row.get("status") or "unknown",
|
||||
# Meta
|
||||
"created_at": sqlite_row.get("created_at"),
|
||||
# Download counters
|
||||
"download_done": merged.get("downloaded", 0),
|
||||
"download_total": merged.get("chapters_total", 0),
|
||||
# Audio counters
|
||||
"audio_done": merged.get("audio_done", 0),
|
||||
"audio_total": merged.get("chapters_total", 0),
|
||||
}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# INSPECT ONLY — NO WRITES
|
||||
# ============================================================
|
||||
def inspect_books_state_depecrated():
|
||||
"""
|
||||
Reads all books from SQLite and fetches Redis progress.
|
||||
Builds:
|
||||
• entry.sqlite
|
||||
• entry.redis
|
||||
• entry.would_merge_to
|
||||
• entry.card (book-card compatible)
|
||||
"""
|
||||
|
||||
r = redis.Redis.from_url(os.getenv("REDIS_BROKER"), decode_responses=True)
|
||||
db = get_db()
|
||||
cur = db.cursor()
|
||||
|
||||
cur.execute("SELECT * FROM books")
|
||||
rows = cur.fetchall()
|
||||
|
||||
results = []
|
||||
|
||||
for row in rows:
|
||||
sqlite_row = dict(row)
|
||||
book_idx = sqlite_row["book_idx"]
|
||||
|
||||
redis_key = f"book:{book_idx}:state"
|
||||
redis_state = r.hgetall(redis_key) or {}
|
||||
|
||||
# ================================
|
||||
# DRY-RUN MERGE LOGIC
|
||||
# ================================
|
||||
merged = sqlite_row.copy()
|
||||
|
||||
if redis_state:
|
||||
|
||||
merged["downloaded"] = int(
|
||||
redis_state.get("chapters_download_done", merged.get("downloaded", 0))
|
||||
)
|
||||
|
||||
merged["parsed"] = int(
|
||||
redis_state.get("chapters_parsed_done", merged.get("parsed", 0))
|
||||
)
|
||||
|
||||
merged["audio_done"] = int(
|
||||
redis_state.get("audio_done", merged.get("audio_done", 0))
|
||||
)
|
||||
|
||||
merged["chapters_total"] = int(
|
||||
redis_state.get("chapters_total", merged.get("chapters_total", 0))
|
||||
)
|
||||
|
||||
merged["status"] = redis_state.get(
|
||||
"status", merged.get("status", "unknown")
|
||||
)
|
||||
|
||||
# ================================
|
||||
# Build book-card data
|
||||
# ================================
|
||||
card = _build_card(sqlite_row, redis_state, merged)
|
||||
|
||||
# ================================
|
||||
# Append final result entry
|
||||
# ================================
|
||||
results.append(
|
||||
{
|
||||
"book_idx": book_idx,
|
||||
"title": sqlite_row.get("title"),
|
||||
"sqlite": sqlite_row,
|
||||
"redis": redis_state,
|
||||
"would_merge_to": merged,
|
||||
"card": card,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ============================================================
|
||||
# INSPECT ONLY — NO WRITES
|
||||
# ============================================================
|
||||
def inspect_books_state():
|
||||
"""
|
||||
Reads canonical book state from repository.
|
||||
Builds:
|
||||
• entry.sqlite
|
||||
• entry.redis
|
||||
• entry.would_merge_to
|
||||
• entry.card (book-card compatible)
|
||||
"""
|
||||
|
||||
from db.repository import get_book_state
|
||||
from db.db import get_db
|
||||
|
||||
db = get_db()
|
||||
cur = db.cursor()
|
||||
|
||||
# Alleen nodig om te weten *welke* books er zijn
|
||||
cur.execute("SELECT book_idx FROM books")
|
||||
rows = cur.fetchall()
|
||||
|
||||
results = []
|
||||
|
||||
for row in rows:
|
||||
book_idx = row["book_idx"]
|
||||
|
||||
# --------------------------------
|
||||
# Canonical state (ENIGE waarheid)
|
||||
# --------------------------------
|
||||
state = get_book_state(book_idx)
|
||||
|
||||
# SQLite-view = alleen SQLite-kolommen
|
||||
sqlite_view = {
|
||||
k: v
|
||||
for k, v in state.items()
|
||||
if k
|
||||
in (
|
||||
"book_idx",
|
||||
"title",
|
||||
"author",
|
||||
"description",
|
||||
"cover_path",
|
||||
"book_url",
|
||||
"chapters_total",
|
||||
"status",
|
||||
"downloaded",
|
||||
"parsed",
|
||||
"audio_done",
|
||||
"created_at",
|
||||
"processdate",
|
||||
"last_update",
|
||||
)
|
||||
}
|
||||
|
||||
# Redis-view = alleen Redis counters/status
|
||||
redis_view = {
|
||||
k: v
|
||||
for k, v in state.items()
|
||||
if k.startswith("chapters_")
|
||||
or k in ("status", "audio_done", "audio_skipped")
|
||||
}
|
||||
|
||||
merged = state # letterlijk de canonieke state
|
||||
|
||||
card = _build_card(sqlite_view, redis_view, merged)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"book_idx": book_idx,
|
||||
"title": state.get("title"),
|
||||
"sqlite": sqlite_view,
|
||||
"redis": redis_view,
|
||||
"would_merge_to": merged,
|
||||
"card": card,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ============================================================
|
||||
# SYNC REDIS → SQLITE (writes)
|
||||
# ============================================================
|
||||
def sync_books_from_redis():
|
||||
"""
|
||||
Writes Redis progress values back into SQLite.
|
||||
Uses unified book_idx as identifier.
|
||||
"""
|
||||
|
||||
r = redis.Redis.from_url(os.getenv("REDIS_BROKER"), decode_responses=True)
|
||||
db = get_db()
|
||||
cur = db.cursor()
|
||||
|
||||
cur.execute("SELECT * FROM books")
|
||||
rows = cur.fetchall()
|
||||
|
||||
results = []
|
||||
|
||||
for row in rows:
|
||||
before = dict(row)
|
||||
book_idx = before["book_idx"]
|
||||
|
||||
redis_key = f"book:{book_idx}:state"
|
||||
redis_state = r.hgetall(redis_key)
|
||||
|
||||
if not redis_state:
|
||||
results.append(
|
||||
{
|
||||
"book_idx": book_idx,
|
||||
"before": before,
|
||||
"redis": {},
|
||||
"after": before,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
# Extract progress from Redis
|
||||
downloaded = int(redis_state.get("chapters_download_done", 0))
|
||||
parsed = int(redis_state.get("chapters_parsed_done", 0))
|
||||
audio_done = int(redis_state.get("audio_done", 0))
|
||||
total = int(redis_state.get("chapters_total", 0))
|
||||
status = redis_state.get("status", before.get("status"))
|
||||
|
||||
# Update SQLite
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE books
|
||||
SET downloaded = ?, parsed = ?, audio_done = ?, chapters_total = ?, status = ?, last_update = datetime('now')
|
||||
WHERE book_idx = ?
|
||||
""",
|
||||
(downloaded, parsed, audio_done, total, status, book_idx),
|
||||
)
|
||||
db.commit()
|
||||
|
||||
cur.execute("SELECT * FROM books WHERE book_idx = ?", (book_idx,))
|
||||
after = dict(cur.fetchone())
|
||||
|
||||
results.append(
|
||||
{
|
||||
"book_idx": book_idx,
|
||||
"before": before,
|
||||
"redis": redis_state,
|
||||
"after": after,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
After Width: | Height: | Size: 4.1 KiB |
|
After Width: | Height: | Size: 6.4 KiB |
|
After Width: | Height: | Size: 3.5 KiB |
|
After Width: | Height: | Size: 12 KiB |
@ -1,310 +0,0 @@
|
||||
/* =======================================================================
|
||||
File: static/css/bookcard.css
|
||||
Purpose:
|
||||
Styling voor registered book cards:
|
||||
- status kleuren
|
||||
- badges
|
||||
- start/abort/statuscheck
|
||||
- progress bars
|
||||
======================================================================= */
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
GRID WRAPPER
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.registered-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(340px, 1fr));
|
||||
gap: 20px;
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
BOOK CARD BASE
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.book-card {
|
||||
position: relative;
|
||||
display: grid;
|
||||
grid-template-columns: 90px auto;
|
||||
gap: 15px;
|
||||
|
||||
padding: 15px;
|
||||
background: #fff;
|
||||
border-radius: 10px;
|
||||
border: 1px solid #e5e5e5;
|
||||
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
|
||||
|
||||
transition: border-color 0.25s ease, box-shadow 0.25s ease;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
STATUS COLORS (BOOK CARD BORDER)
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
/* Downloading / actief bezig */
|
||||
.book-card.downloading {
|
||||
border-color: #ff9500;
|
||||
box-shadow: 0 0 6px rgba(255, 149, 0, 0.35);
|
||||
}
|
||||
|
||||
/* Audio fase */
|
||||
.book-card.audio {
|
||||
border-color: #ffca28;
|
||||
box-shadow: 0 0 6px rgba(255, 202, 40, 0.35);
|
||||
}
|
||||
|
||||
/* Volledig klaar */
|
||||
.book-card.done {
|
||||
border: 2px solid #4caf50;
|
||||
box-shadow: 0 0 6px rgba(76, 175, 80, 0.35);
|
||||
}
|
||||
|
||||
/* Afgebroken */
|
||||
.book-card.aborted {
|
||||
border-color: #ff3b30;
|
||||
box-shadow: 0 0 6px rgba(255, 59, 48, 0.35);
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
COVER
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.book-cover {
|
||||
width: 90px;
|
||||
}
|
||||
|
||||
.book-img {
|
||||
width: 90px;
|
||||
height: 130px;
|
||||
object-fit: cover;
|
||||
border-radius: 4px;
|
||||
background: #f4f4f4;
|
||||
}
|
||||
|
||||
.placeholder {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
color: #777;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
META
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.book-meta {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: space-between;
|
||||
}
|
||||
|
||||
.book-title {
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.book-author {
|
||||
font-size: 14px;
|
||||
color: #444;
|
||||
margin-bottom: 6px;
|
||||
}
|
||||
|
||||
.book-created {
|
||||
font-size: 12px;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
ACTION BUTTONS
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.book-actions {
|
||||
display: flex;
|
||||
justify-content: flex-end;
|
||||
gap: 10px;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
.icon-btn {
|
||||
width: 34px;
|
||||
height: 34px;
|
||||
border: none;
|
||||
border-radius: 8px;
|
||||
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
|
||||
font-size: 16px;
|
||||
color: #fff;
|
||||
cursor: pointer;
|
||||
|
||||
transition: background 0.15s ease, transform 0.1s ease;
|
||||
}
|
||||
|
||||
/* Start */
|
||||
.icon-start {
|
||||
background: #2d8a3d;
|
||||
}
|
||||
.icon-start:hover {
|
||||
background: #226c30;
|
||||
transform: scale(1.05);
|
||||
}
|
||||
.icon-start:disabled {
|
||||
background: #9bbb9f;
|
||||
cursor: not-allowed;
|
||||
opacity: 0.5;
|
||||
}
|
||||
|
||||
/* Abort */
|
||||
.icon-abort {
|
||||
background: #c62828;
|
||||
}
|
||||
.icon-abort:hover {
|
||||
background: #a31f1f;
|
||||
transform: scale(1.05);
|
||||
}
|
||||
.icon-abort:disabled {
|
||||
background: #d8a0a0;
|
||||
cursor: not-allowed;
|
||||
opacity: 0.5;
|
||||
}
|
||||
|
||||
/* Hide */
|
||||
.hide-form {
|
||||
position: absolute;
|
||||
top: 6px;
|
||||
right: 6px;
|
||||
}
|
||||
.icon-hide {
|
||||
background: #777;
|
||||
}
|
||||
.icon-hide:hover {
|
||||
background: #555;
|
||||
}
|
||||
|
||||
/* Statuscheck */
|
||||
.statuscheck-btn {
|
||||
background-color: #444;
|
||||
color: #fff;
|
||||
border: 1px solid #666;
|
||||
margin-left: 4px;
|
||||
padding: 4px 8px;
|
||||
border-radius: 6px;
|
||||
font-size: 12px;
|
||||
cursor: pointer;
|
||||
}
|
||||
.statuscheck-btn:hover {
|
||||
background-color: #333;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
PROGRESS (FULL WIDTH)
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.book-progress {
|
||||
grid-column: 1 / -1;
|
||||
margin-top: 12px;
|
||||
padding: 10px 12px;
|
||||
background: #f6f6f6;
|
||||
border-radius: 8px;
|
||||
}
|
||||
|
||||
.progress-row {
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
|
||||
.progress-label {
|
||||
font-size: 12px;
|
||||
margin-bottom: 4px;
|
||||
color: #444;
|
||||
}
|
||||
|
||||
/* BAR */
|
||||
.progressbar {
|
||||
position: relative;
|
||||
width: 100%;
|
||||
height: 14px;
|
||||
background: #ddd;
|
||||
border-radius: 7px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.progressbar-fill {
|
||||
height: 100%;
|
||||
transition: width 0.4s ease;
|
||||
}
|
||||
|
||||
/* Download */
|
||||
.progressbar-fill.download {
|
||||
background: #2196f3;
|
||||
}
|
||||
|
||||
/* Audio */
|
||||
.progressbar-fill.audio {
|
||||
background: #4caf50;
|
||||
}
|
||||
|
||||
/* TEXT IN BAR */
|
||||
.progressbar-text {
|
||||
position: absolute;
|
||||
inset: 0;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
|
||||
font-size: 11px;
|
||||
font-weight: 600;
|
||||
color: #fff;
|
||||
text-shadow: 0 1px 2px rgba(0, 0, 0, 0.6);
|
||||
pointer-events: none;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
STATUS BADGE
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.status-badge {
|
||||
display: inline-block;
|
||||
margin-bottom: 6px;
|
||||
padding: 2px 8px;
|
||||
font-size: 11px;
|
||||
font-weight: 600;
|
||||
border-radius: 10px;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.5px;
|
||||
cursor: default;
|
||||
}
|
||||
|
||||
/* DONE */
|
||||
.status-badge.status-done {
|
||||
background-color: #e6f4ea;
|
||||
color: #2e7d32;
|
||||
border: 1px solid #4caf50;
|
||||
}
|
||||
|
||||
/* AUDIO */
|
||||
.status-badge.status-audio {
|
||||
background-color: #fff8e1;
|
||||
color: #8d6e00;
|
||||
border: 1px solid #ffca28;
|
||||
}
|
||||
|
||||
/* DOWNLOADING */
|
||||
.status-badge.status-downloading {
|
||||
background-color: #e3f2fd;
|
||||
color: #1565c0;
|
||||
border: 1px solid #42a5f5;
|
||||
}
|
||||
|
||||
/* Statuscheck */
|
||||
.icon-statuscheck {
|
||||
background: #444;
|
||||
}
|
||||
|
||||
.icon-statuscheck:hover {
|
||||
background: #333;
|
||||
transform: scale(1.05);
|
||||
}
|
||||
@ -1,145 +0,0 @@
|
||||
/* ============================================================
|
||||
File: static/js/bookcard_controller.js
|
||||
Purpose:
|
||||
Single owner for updating book-card DOM from merged state
|
||||
(would_merge_to)
|
||||
============================================================ */
|
||||
|
||||
console.log("[BOOKCARD] controller loaded");
|
||||
|
||||
/* ============================================================
|
||||
ENTRY POINT (called by state_updater.js)
|
||||
============================================================ */
|
||||
|
||||
function updateBookCardsFromState(stateList) {
|
||||
console.log("[BOOKCARD] updateBookCardsFromState called");
|
||||
|
||||
if (!Array.isArray(stateList)) {
|
||||
console.warn("[BOOKCARD] Invalid stateList", stateList);
|
||||
return;
|
||||
}
|
||||
|
||||
const stateById = {};
|
||||
|
||||
stateList.forEach((entry) => {
|
||||
const merged = entry.would_merge_to;
|
||||
if (!merged || merged.book_idx == null) {
|
||||
console.warn("[BOOKCARD] entry without merged/book_idx", entry);
|
||||
return;
|
||||
}
|
||||
stateById[String(merged.book_idx)] = merged;
|
||||
});
|
||||
|
||||
document.querySelectorAll(".book-card").forEach((card) => {
|
||||
const bookIdx = card.dataset.bookIdx;
|
||||
const state = stateById[bookIdx];
|
||||
|
||||
if (!state) {
|
||||
console.debug("[BOOKCARD] No state for book_idx", bookIdx);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log("[BOOKCARD] Updating card", bookIdx, state.status);
|
||||
updateSingleBookCard(card, state);
|
||||
});
|
||||
}
|
||||
|
||||
/* ============================================================
|
||||
SINGLE CARD UPDATE
|
||||
============================================================ */
|
||||
|
||||
function updateSingleBookCard(card, state) {
|
||||
console.log("[BOOKCARD] updateSingleBookCard", state.book_idx);
|
||||
|
||||
updateStatus(card, state);
|
||||
updateStatusBadge(card, state);
|
||||
updateButtons(card, state);
|
||||
updateProgress(card, state);
|
||||
}
|
||||
|
||||
/* ============================================================
|
||||
STATUS
|
||||
============================================================ */
|
||||
|
||||
function updateStatus(card, state) {
|
||||
console.log("[BOOKCARD][STATUS]", state.book_idx, "→", state.status);
|
||||
card.className = `book-card ${state.status || ""}`;
|
||||
}
|
||||
function updateStatusBadge(card, state) {
|
||||
const badge = card.querySelector(".status-badge");
|
||||
if (!badge) return;
|
||||
|
||||
const status = (state.status || "").toLowerCase();
|
||||
|
||||
badge.textContent = status.toUpperCase();
|
||||
badge.className = `status-badge status-${status}`;
|
||||
badge.title =
|
||||
{
|
||||
downloading: "Bezig met downloaden",
|
||||
audio: "Downloads compleet, audio wordt gegenereerd",
|
||||
done: "Alle chapters en audio zijn compleet",
|
||||
}[status] || "";
|
||||
}
|
||||
|
||||
/* ============================================================
|
||||
BUTTONS
|
||||
============================================================ */
|
||||
|
||||
function updateButtons(card, state) {
|
||||
const startBtn = card.querySelector(".icon-start");
|
||||
const abortBtn = card.querySelector(".icon-abort");
|
||||
|
||||
const busy = ["starting", "downloading", "parsing", "audio"];
|
||||
|
||||
console.log("[BOOKCARD][BUTTONS]", state.book_idx, "status:", state.status);
|
||||
|
||||
if (startBtn) {
|
||||
// startBtn.disabled = busy.includes(state.status);
|
||||
}
|
||||
|
||||
if (abortBtn) {
|
||||
abortBtn.disabled = !busy.includes(state.status);
|
||||
}
|
||||
}
|
||||
|
||||
/* ============================================================
|
||||
PROGRESS (DOWNLOAD + AUDIO)
|
||||
============================================================ */
|
||||
|
||||
function updateProgress(card, s) {
|
||||
const total = Number(s.chapters_total || 0);
|
||||
|
||||
// const downloadDone =
|
||||
// Number(s.chapters_download_done || 0) +
|
||||
// Number(s.chapters_download_skipped || 0);
|
||||
const downloadDone = Number(s.downloaded || 0);
|
||||
|
||||
const audioDone = Number(s.audio_done || 0) + Number(s.audio_skipped || 0);
|
||||
|
||||
const downloadPct =
|
||||
total > 0 ? Math.min((downloadDone / total) * 100, 100) : 0;
|
||||
|
||||
const audioPct = total > 0 ? Math.min((audioDone / total) * 100, 100) : 0;
|
||||
|
||||
console.log("[BOOKCARD][PROGRESS]", s.book_idx, {
|
||||
total,
|
||||
downloadDone,
|
||||
audioDone,
|
||||
downloadPct,
|
||||
audioPct,
|
||||
});
|
||||
|
||||
/* ---- DOWNLOAD ---- */
|
||||
const dlBar = card.querySelector('[data-field="download_pct"]');
|
||||
const dlText = card.querySelector('[data-field="download_text"]');
|
||||
|
||||
if (dlBar) dlBar.style.width = `${downloadPct}%`;
|
||||
if (dlText) dlText.textContent = `${downloadDone} / ${total}`;
|
||||
|
||||
/* ---- AUDIO ---- */
|
||||
const auBar = card.querySelector('[data-field="audio_pct"]');
|
||||
const auText = card.querySelector('[data-field="audio_text"]');
|
||||
|
||||
if (auBar) auBar.style.width = `${audioPct}%`;
|
||||
if (auText) auText.textContent = `${audioDone} / ${total}`;
|
||||
}
|
||||
@ -1,178 +1,200 @@
|
||||
/* =======================================================================
|
||||
File: static/js/dashboard.js
|
||||
Purpose:
|
||||
- Sidebar selectie
|
||||
- Start / Abort acties
|
||||
- UI status updates
|
||||
NOTE:
|
||||
- GEEN polling
|
||||
- state_updater.js is leidend
|
||||
Dashboard interactions:
|
||||
- select book
|
||||
- refresh logs
|
||||
- refresh progress
|
||||
NOTE:
|
||||
$ / $$ / autoScroll komen uit helpers.js
|
||||
======================================================================= */
|
||||
|
||||
console.log("[DASHBOARD] loaded");
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
Helpers
|
||||
--------------------------------------------------------- */
|
||||
Simple fetch wrapper
|
||||
--------------------------------------------------------- */
|
||||
async function apiGet(url) {
|
||||
console.log("[DASHBOARD][API] GET", url);
|
||||
try {
|
||||
const r = await fetch(url, { cache: "no-store" });
|
||||
if (!r.ok) {
|
||||
console.warn("[DASHBOARD][API] GET failed", url, r.status);
|
||||
return null;
|
||||
}
|
||||
const r = await fetch(url);
|
||||
if (!r.ok) return null;
|
||||
return await r.json();
|
||||
} catch (e) {
|
||||
console.error("[DASHBOARD][API] GET error", url, e);
|
||||
console.error("API GET failed:", url, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function safeUpdateLogs(data) {
|
||||
if (typeof window.updateLogs === "function") {
|
||||
console.log("[DASHBOARD] updateLogs()");
|
||||
window.updateLogs(data);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
State
|
||||
--------------------------------------------------------- */
|
||||
let ACTIVE_BOOK_IDX = null;
|
||||
Dashboard state
|
||||
--------------------------------------------------------- */
|
||||
let ACTIVE_BOOK = null;
|
||||
let REFRESH_INTERVAL = null;
|
||||
|
||||
console.log(">>> dashboard.js LOADED");
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
DOM READY
|
||||
--------------------------------------------------------- */
|
||||
DOM Ready → setup
|
||||
--------------------------------------------------------- */
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
console.log("[DASHBOARD] DOMContentLoaded");
|
||||
|
||||
bindSidebar();
|
||||
bindBookCardButtons();
|
||||
console.log(">>> dashboard.js DOMContentLoaded");
|
||||
// =====================================================
|
||||
// GLOBAL FALLBACK POLLING — ALWAYS FETCH LOGS
|
||||
// Runs when no books exist or no selection has been made
|
||||
// =====================================================
|
||||
console.log(">>> dashboard.js: enabling global fallback polling");
|
||||
|
||||
setInterval(() => {
|
||||
// if no active book → fetch global logs
|
||||
if (!ACTIVE_BOOK) {
|
||||
refreshBook(null); // triggers /logs
|
||||
}
|
||||
}, 2000);
|
||||
|
||||
const items = $$(".book-list-item");
|
||||
console.log(">>> dashboard.js found book-list items:", items.length);
|
||||
|
||||
// Geen boeken → geen polling starten
|
||||
// if (!items || items.length === 0) {
|
||||
// console.log(">>> dashboard.js: geen boeken aanwezig, polling uit.");
|
||||
// return;
|
||||
// }
|
||||
|
||||
// Book selection listener
|
||||
items.forEach((item) => {
|
||||
item.addEventListener("click", () => {
|
||||
console.log(">>> dashboard.js: user clicked book:", item.dataset.bookId);
|
||||
selectBook(item.dataset.bookId);
|
||||
});
|
||||
});
|
||||
|
||||
const first = document.querySelector(".book-list-item");
|
||||
if (first) {
|
||||
console.log("[DASHBOARD] auto-select", first.dataset.bookIdx);
|
||||
selectBook(first.dataset.bookIdx);
|
||||
// Auto-select first book
|
||||
if (!ACTIVE_BOOK && items[0]) {
|
||||
console.log(
|
||||
">>> dashboard.js: auto-select first book:",
|
||||
items[0].dataset.bookId
|
||||
);
|
||||
selectBook(items[0].dataset.bookId);
|
||||
}
|
||||
});
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
Sidebar
|
||||
--------------------------------------------------------- */
|
||||
function bindSidebar() {
|
||||
console.log("[DASHBOARD] bindSidebar()");
|
||||
document.querySelectorAll(".book-list-item").forEach((item) => {
|
||||
item.onclick = () => selectBook(item.dataset.bookIdx);
|
||||
Select a book (updates UI + starts polling)
|
||||
--------------------------------------------------------- */
|
||||
function selectBook(bookId) {
|
||||
console.log(">>> selectBook(", bookId, ")");
|
||||
|
||||
ACTIVE_BOOK = bookId;
|
||||
|
||||
// Highlight
|
||||
$$(".book-list-item").forEach((el) => {
|
||||
el.classList.toggle("active", el.dataset.bookId === bookId);
|
||||
});
|
||||
|
||||
// Reset previous polling
|
||||
if (REFRESH_INTERVAL) {
|
||||
console.log(">>> dashboard.js: clearing previous polling interval");
|
||||
clearInterval(REFRESH_INTERVAL);
|
||||
}
|
||||
|
||||
// Start new polling
|
||||
console.log(">>> dashboard.js: starting polling for bookId =", bookId);
|
||||
REFRESH_INTERVAL = setInterval(() => {
|
||||
refreshBook(ACTIVE_BOOK);
|
||||
}, 2000);
|
||||
|
||||
// Immediate refresh
|
||||
refreshBook(ACTIVE_BOOK);
|
||||
}
|
||||
setInterval(refreshActiveBooks, 2000);
|
||||
async function refreshActiveBooks() {
|
||||
const books = await apiGet("/api/books");
|
||||
if (!books) return;
|
||||
|
||||
const container = $("#book-list");
|
||||
if (!container) return;
|
||||
|
||||
function selectBook(bookIdx) {
|
||||
if (!bookIdx || bookIdx === ACTIVE_BOOK_IDX) return;
|
||||
// Herbouw de lijst
|
||||
container.innerHTML = "";
|
||||
books.forEach((b) => {
|
||||
const div = document.createElement("div");
|
||||
div.className = "book-list-item";
|
||||
div.dataset.bookId = b.book_id;
|
||||
|
||||
ACTIVE_BOOK_IDX = bookIdx;
|
||||
console.log("[DASHBOARD] selectBook", bookIdx);
|
||||
div.innerHTML = `
|
||||
<div class="book-title">${b.title}</div>
|
||||
<div class="book-status">${b.status}</div>
|
||||
<div class="book-progress">
|
||||
${b.download_done}/${b.download_total} downloaded,
|
||||
${b.audio_done}/${b.audio_total} audio
|
||||
</div>
|
||||
|
||||
document.querySelectorAll(".book-list-item").forEach((el) => {
|
||||
el.classList.toggle("active", el.dataset.bookIdx === bookIdx);
|
||||
<button class="abort-btn" onclick="abortBook('${b.book_id}')">Abort</button>
|
||||
`;
|
||||
|
||||
// Event listener opnieuw koppelen
|
||||
div.addEventListener("click", () => selectBook(b.book_id));
|
||||
|
||||
container.appendChild(div);
|
||||
});
|
||||
|
||||
refreshBook(bookIdx);
|
||||
// Als ACTIVE_BOOK nog niet bekend → auto-selecteer eerste boek
|
||||
if (!ACTIVE_BOOK && books.length > 0) {
|
||||
selectBook(books[0].book_id);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
Book refresh (NO POLLING)
|
||||
--------------------------------------------------------- */
|
||||
async function refreshBook(bookIdx) {
|
||||
console.log("[DASHBOARD] refreshBook", bookIdx);
|
||||
Fetch logs + progress from API
|
||||
--------------------------------------------------------- */
|
||||
async function refreshBook(bookId) {
|
||||
console.log(">>> refreshBook(", bookId, ")");
|
||||
|
||||
const logs = await apiGet(`/api/book/${bookIdx}/logs`);
|
||||
if (logs) safeUpdateLogs(logs);
|
||||
// 1) Als er GEEN bookId is → haal alleen globale logs op
|
||||
if (!bookId) {
|
||||
console.log(">>> refreshBook: no active book → fetch /logs");
|
||||
|
||||
refreshBookCards();
|
||||
}
|
||||
const data = await apiGet("/logs");
|
||||
if (data && data.logs) updateLogs(data.logs);
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
Bookcard buttons
|
||||
--------------------------------------------------------- */
|
||||
function bindBookCardButtons() {
|
||||
console.log("[DASHBOARD] bindBookCardButtons()");
|
||||
|
||||
document.querySelectorAll(".icon-start").forEach((btn) => {
|
||||
if (btn.dataset.bound) return;
|
||||
btn.dataset.bound = "1";
|
||||
|
||||
btn.onclick = (e) => {
|
||||
e.preventDefault();
|
||||
const card = btn.closest(".book-card");
|
||||
if (!card) return;
|
||||
startBook(card.dataset.bookIdx);
|
||||
};
|
||||
});
|
||||
return; // klaar
|
||||
}
|
||||
|
||||
document.querySelectorAll(".icon-abort").forEach((btn) => {
|
||||
if (btn.dataset.bound) return;
|
||||
btn.dataset.bound = "1";
|
||||
// 2) Als er WEL een boek is → haal book status + logs op
|
||||
const state = await apiGet(`/api/book/${bookId}/status`);
|
||||
const logs = await apiGet(`/api/book/${bookId}/logs`);
|
||||
|
||||
btn.onclick = (e) => {
|
||||
e.preventDefault();
|
||||
const card = btn.closest(".book-card");
|
||||
if (!card) return;
|
||||
abortBook(card.dataset.bookIdx);
|
||||
};
|
||||
});
|
||||
}
|
||||
console.log(">>> refreshBook state =", state);
|
||||
console.log(">>> refreshBook logs =", logs);
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
START
|
||||
--------------------------------------------------------- */
|
||||
function startBook(bookIdx) {
|
||||
console.log("[DASHBOARD] START", bookIdx);
|
||||
|
||||
fetch("/start", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/x-www-form-urlencoded" },
|
||||
body: `book_idx=${bookIdx}`,
|
||||
}).then(() => refreshBook(bookIdx));
|
||||
if (state) updateProgressBars(state);
|
||||
if (logs) updateLogs(logs);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
ABORT
|
||||
--------------------------------------------------------- */
|
||||
function abortBook(bookIdx) {
|
||||
if (!confirm(`Abort book ${bookIdx}?`)) return;
|
||||
Update LOG VIEW panel
|
||||
--------------------------------------------------------- */
|
||||
function updateLogs(logList) {
|
||||
const output = $("#log-output");
|
||||
if (!output) {
|
||||
console.warn(">>> updateLogs: no #log-output element found");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log("[DASHBOARD] ABORT", bookIdx);
|
||||
output.innerHTML = "";
|
||||
|
||||
fetch(`/abort/${bookIdx}`, { method: "POST" }).then(() =>
|
||||
refreshBook(bookIdx)
|
||||
);
|
||||
}
|
||||
logList.forEach((line) => logAppend(line));
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
Bookcard UI refresh (non-progress)
|
||||
--------------------------------------------------------- */
|
||||
async function refreshBookCards() {
|
||||
console.log("[DASHBOARD] refreshBookCards()");
|
||||
const books = await apiGet("/api/books");
|
||||
if (!books) return;
|
||||
autoScroll(output);
|
||||
}
|
||||
|
||||
document.querySelectorAll(".book-card").forEach((card) => {
|
||||
const idx = card.dataset.bookIdx;
|
||||
const info = books.find((b) => b.book_idx === idx);
|
||||
if (!info) return;
|
||||
|
||||
console.log("[DASHBOARD] card status", idx, info.status);
|
||||
card.className = `book-card ${info.status}`;
|
||||
|
||||
const abortBtn = card.querySelector(".icon-abort");
|
||||
if (abortBtn) {
|
||||
abortBtn.disabled = ![
|
||||
"processing",
|
||||
"downloading",
|
||||
"parsing",
|
||||
"audio",
|
||||
].includes(info.status);
|
||||
}
|
||||
});
|
||||
function abortBook(book_id) {
|
||||
if (!confirm(`Abort tasks for book ${book_id}?`)) return;
|
||||
|
||||
fetch(`/abort/${book_id}`, { method: "POST" })
|
||||
.then((r) => r.json())
|
||||
.then((data) => {
|
||||
console.log("Abort:", data);
|
||||
})
|
||||
.catch((err) => {
|
||||
console.error("Abort failed:", err);
|
||||
});
|
||||
}
|
||||
|
||||
@ -1,101 +0,0 @@
|
||||
/* ============================================================
|
||||
File: static/js/inspect_state.js
|
||||
Purpose:
|
||||
- Receive merged state via state_updater.js
|
||||
- Update ONLY the right-side state tables
|
||||
- NO polling, NO fetch
|
||||
============================================================ */
|
||||
|
||||
console.log("[inspect_state] JS loaded (subscriber mode)");
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
State subscription
|
||||
------------------------------------------------------------ */
|
||||
|
||||
window.addEventListener("state:update", (e) => {
|
||||
const entries = e.detail;
|
||||
|
||||
if (!Array.isArray(entries)) {
|
||||
console.warn("[inspect_state] state:update payload is not array", entries);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log("[inspect_state] state:update received entries:", entries.length);
|
||||
updateInspectTables(entries);
|
||||
});
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Update tables
|
||||
------------------------------------------------------------ */
|
||||
|
||||
function updateInspectTables(entries) {
|
||||
console.log("[inspect_state] updating tables");
|
||||
|
||||
entries.forEach((entry) => {
|
||||
const bookIdx = entry.book_idx;
|
||||
if (bookIdx == null) {
|
||||
console.warn("[inspect_state] entry without book_idx", entry);
|
||||
return;
|
||||
}
|
||||
|
||||
const block = document.querySelector(
|
||||
`.state-block[data-book-idx="${bookIdx}"]`
|
||||
);
|
||||
if (!block) {
|
||||
console.warn("[inspect_state] no state-block for book_idx", bookIdx);
|
||||
return;
|
||||
}
|
||||
|
||||
const table = block.querySelector(".state-table");
|
||||
if (!table) {
|
||||
console.warn("[inspect_state] no state-table for book_idx", bookIdx);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log("[inspect_state] updating table for book_idx", bookIdx);
|
||||
|
||||
const sql = entry.sqlite || {};
|
||||
const redis = entry.redis || {};
|
||||
const merged = entry.would_merge_to || {};
|
||||
|
||||
table.innerHTML = `
|
||||
<tr>
|
||||
<th>Field</th>
|
||||
<th>SQLite</th>
|
||||
<th>Redis</th>
|
||||
<th>Merged</th>
|
||||
</tr>
|
||||
${row("status", sql, redis, merged)}
|
||||
${row("chapters_total", sql, redis, merged)}
|
||||
${row("downloaded", sql, redis, merged)}
|
||||
${row("chapters_download_done", sql, redis, merged)}
|
||||
${row("chapters_download_skipped", sql, redis, merged)}
|
||||
${row("parsed", sql, redis, merged)}
|
||||
${row("chapters_parsed_done", sql, redis, merged)}
|
||||
${row("audio_done", sql, redis, merged)}
|
||||
${row("audio_skipped", sql, redis, merged)}
|
||||
${row("last_update", sql, redis, merged)}
|
||||
`;
|
||||
});
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Row helper
|
||||
------------------------------------------------------------ */
|
||||
|
||||
function row(field, sql, redis, merged) {
|
||||
const s = sql[field] ?? "";
|
||||
const r = redis[field] ?? "";
|
||||
const m = merged[field] ?? "";
|
||||
|
||||
const cls = String(s) === String(r) ? "same" : "diff";
|
||||
|
||||
return `
|
||||
<tr>
|
||||
<th>${field}</th>
|
||||
<td class="${cls}">${s}</td>
|
||||
<td class="${cls}">${r}</td>
|
||||
<td>${m}</td>
|
||||
</tr>
|
||||
`;
|
||||
}
|
||||
@ -0,0 +1,72 @@
|
||||
/* =======================================================================
|
||||
File: static/js/progress.js
|
||||
Purpose:
|
||||
Update progress bars dynamically for the current book.
|
||||
Expects data from API endpoints via dashboard.js or start.js.
|
||||
======================================================================= */
|
||||
|
||||
console.log(">>> progress.js LOADED");
|
||||
|
||||
function updateProgressBars(data) {
|
||||
console.log(">>> progress.js updateProgressBars() CALLED with:", data);
|
||||
|
||||
if (!data) {
|
||||
console.warn(">>> progress.js: NO DATA RECEIVED");
|
||||
return;
|
||||
}
|
||||
|
||||
// Data format expected:
|
||||
// {
|
||||
// download_done,
|
||||
// download_total,
|
||||
// audio_done,
|
||||
// audio_total
|
||||
// }
|
||||
|
||||
const barDL = $(".progress-bar-fill");
|
||||
const barAU = $(".progress-bar-fill.audio-fill");
|
||||
|
||||
console.log(">>> progress.js barDL =", barDL);
|
||||
console.log(">>> progress.js barAU =", barAU);
|
||||
|
||||
const pctDL =
|
||||
data.download_total > 0
|
||||
? (100 * data.download_done) / data.download_total
|
||||
: 0;
|
||||
|
||||
const pctAU =
|
||||
data.audio_total > 0 ? (100 * data.audio_done) / data.audio_total : 0;
|
||||
|
||||
if (barDL) {
|
||||
barDL.style.width = pctDL.toFixed(1) + "%";
|
||||
console.log(">>> progress.js updated DL bar to", pctDL.toFixed(1) + "%");
|
||||
} else {
|
||||
console.warn(">>> progress.js: barDL NOT FOUND");
|
||||
}
|
||||
|
||||
if (barAU) {
|
||||
barAU.style.width = pctAU.toFixed(1) + "%";
|
||||
console.log(">>> progress.js updated AU bar to", pctAU.toFixed(1) + "%");
|
||||
} else {
|
||||
console.warn(">>> progress.js: barAU NOT FOUND");
|
||||
}
|
||||
|
||||
// Update textual stats
|
||||
const stats = $$(".progress-stats span");
|
||||
console.log(">>> progress.js stats elements found:", stats.length);
|
||||
|
||||
// Expected structure: [DL "x/y", DL "pct", AU "x/y", AU "pct"]
|
||||
if (stats.length >= 4) {
|
||||
stats[0].innerText = `${data.download_done} / ${data.download_total}`;
|
||||
stats[1].innerText = pctDL.toFixed(1) + "%";
|
||||
stats[2].innerText = `${data.audio_done} / ${data.audio_total}`;
|
||||
stats[3].innerText = pctAU.toFixed(1) + "%";
|
||||
|
||||
console.log(">>> progress.js stats updated");
|
||||
} else {
|
||||
console.warn(
|
||||
">>> progress.js: not enough stats spans, found",
|
||||
stats.length
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -1,98 +0,0 @@
|
||||
/* ========================================================
|
||||
File: static/js/state_updater.js
|
||||
Purpose:
|
||||
- Poll /api/state/all
|
||||
- Dispatch merged state to subscribers
|
||||
(bookcard_controller, inspect_state, others)
|
||||
- Pause polling when tab inactive
|
||||
======================================================== */
|
||||
|
||||
console.log("[STATE-UPDATER] loaded");
|
||||
|
||||
const STATE_POLL_INTERVAL_MS = 2500;
|
||||
const STATE_ENDPOINT = "/api/state/all";
|
||||
|
||||
let STATE_TIMER = null;
|
||||
|
||||
/* ========================================================
|
||||
INIT
|
||||
======================================================== */
|
||||
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
initStateUpdater();
|
||||
});
|
||||
|
||||
function initStateUpdater() {
|
||||
const cards = document.querySelectorAll(".book-card");
|
||||
|
||||
if (cards.length === 0) {
|
||||
console.log("[STATE-UPDATER] No bookcards found — skipping");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[STATE-UPDATER] Starting updater for ${cards.length} bookcards`);
|
||||
|
||||
startPolling(true);
|
||||
|
||||
document.addEventListener("visibilitychange", () => {
|
||||
document.hidden ? stopPolling() : startPolling(true);
|
||||
});
|
||||
}
|
||||
|
||||
/* ========================================================
|
||||
DISPATCH
|
||||
======================================================== */
|
||||
|
||||
function dispatchState(entries) {
|
||||
console.debug("[STATE] dispatch", entries.length);
|
||||
|
||||
// 1. Bookcards
|
||||
if (typeof window.updateBookCardsFromState === "function") {
|
||||
window.updateBookCardsFromState(entries);
|
||||
}
|
||||
|
||||
// 2. Inspect state tables / other subscribers
|
||||
window.dispatchEvent(new CustomEvent("state:update", { detail: entries }));
|
||||
}
|
||||
|
||||
/* ========================================================
|
||||
POLLING CONTROL
|
||||
======================================================== */
|
||||
|
||||
function startPolling(immediate = false) {
|
||||
if (STATE_TIMER) return;
|
||||
|
||||
console.log("[STATE-UPDATER] Start polling");
|
||||
|
||||
if (immediate) pollState();
|
||||
|
||||
STATE_TIMER = setInterval(pollState, STATE_POLL_INTERVAL_MS);
|
||||
}
|
||||
|
||||
function stopPolling() {
|
||||
if (!STATE_TIMER) return;
|
||||
|
||||
console.log("[STATE-UPDATER] Stop polling (tab inactive)");
|
||||
clearInterval(STATE_TIMER);
|
||||
STATE_TIMER = null;
|
||||
}
|
||||
|
||||
/* ========================================================
|
||||
POLL API
|
||||
======================================================== */
|
||||
|
||||
async function pollState() {
|
||||
if (document.hidden) return;
|
||||
|
||||
try {
|
||||
const resp = await fetch(STATE_ENDPOINT, { cache: "no-store" });
|
||||
if (!resp.ok) return;
|
||||
|
||||
const entries = await resp.json();
|
||||
if (!Array.isArray(entries)) return;
|
||||
|
||||
dispatchState(entries);
|
||||
} catch (e) {
|
||||
console.error("[STATE-UPDATER] poll error", e);
|
||||
}
|
||||
}
|
||||
@ -1,90 +0,0 @@
|
||||
{# ============================================================ File:
|
||||
templates/components/bookcard.html Purpose: Eén enkele boekkaart (dumb
|
||||
component) ============================================================ #}
|
||||
|
||||
<div class="book-card {{ b.status }}" data-book-idx="{{ b.book_idx }}">
|
||||
<!-- HIDE -->
|
||||
<form
|
||||
action="/hide/{{ b.book_idx }}"
|
||||
method="POST"
|
||||
class="hide-form"
|
||||
onsubmit="return confirm('Dit boek verbergen?')"
|
||||
>
|
||||
<button class="icon-btn icon-hide" title="Verbergen">
|
||||
<i class="fa-solid fa-xmark"></i>
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<!-- COVER -->
|
||||
<div class="book-cover">
|
||||
{% if b.cover_path %}
|
||||
<img
|
||||
src="/{{ b.cover_path }}"
|
||||
class="book-img"
|
||||
data-field="cover"
|
||||
alt="cover"
|
||||
/>
|
||||
{% else %}
|
||||
<div class="book-img placeholder" data-field="cover">?</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- META -->
|
||||
<div class="book-meta">
|
||||
<!-- STATUS BADGE -->
|
||||
{% if b.status %}
|
||||
<span
|
||||
class="status-badge status-{{ b.status }}"
|
||||
title="
|
||||
{% if b.status == 'done' %}Alle chapters en audio zijn compleet{% endif %}
|
||||
{% if b.status == 'audio' %}Downloads compleet, audio wordt nog gegenereerd{% endif %}
|
||||
{% if b.status == 'downloading' %}Bezig met downloaden{% endif %}
|
||||
"
|
||||
>
|
||||
{{ b.status | upper }}
|
||||
</span>
|
||||
{% endif %}
|
||||
|
||||
<div class="book-title" data-field="title">{{ b.title }}</div>
|
||||
<div class="book-author" data-field="author">{{ b.author }}</div>
|
||||
<div class="book-created">
|
||||
Geregistreerd: <span data-field="created_at">{{ b.created_at }}</span>
|
||||
</div>
|
||||
|
||||
<!-- ACTIONS -->
|
||||
<div class="book-actions">
|
||||
<!-- START -->
|
||||
<form action="/start" method="POST">
|
||||
<input type="hidden" name="book_idx" value="{{ b.book_idx }}" />
|
||||
<button class="icon-btn icon-start" title="Start" data-action="start">
|
||||
<i class="fa-solid fa-play"></i>
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<!-- ABORT -->
|
||||
<form action="/abort/{{ b.book_idx }}" method="POST">
|
||||
<input type="hidden" name="book_idx" value="{{ b.book_idx }}" />
|
||||
<button class="icon-btn icon-abort" title="Abort" data-action="abort">
|
||||
<i class="fa-solid fa-stop"></i>
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<form
|
||||
method="post"
|
||||
action="/inspect/statuscheck/{{ b.book_idx }}"
|
||||
style="display: inline-block"
|
||||
>
|
||||
<button
|
||||
type="submit"
|
||||
class="icon-btn icon-statuscheck"
|
||||
title="Herbereken status op basis van bestanden"
|
||||
>
|
||||
<i class="fa-solid fa-magnifying-glass-chart"></i>
|
||||
</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- PROGRESS -->
|
||||
<div class="book-progress">{% include "components/progress_box.html" %}</div>
|
||||
</div>
|
||||
@ -1,34 +1,61 @@
|
||||
<!-- =======================================================================
|
||||
File: templates/components/progress_box.html
|
||||
Purpose:
|
||||
Dumb progress UI for a book card.
|
||||
Initial values via Jinja, live updates via state_updater.js
|
||||
======================================================================= -->
|
||||
Purpose: Reusable progress overview (download + audio) for any book.
|
||||
Notes:
|
||||
- Expects the following variables from Flask:
|
||||
book_id: str
|
||||
title: str
|
||||
download_total: int
|
||||
download_done: int
|
||||
audio_total: int
|
||||
audio_done: int
|
||||
- Pure HTML; JS for live updates will be added later.
|
||||
======================================================================= -->
|
||||
|
||||
<div class="progress-box">
|
||||
<!-- DOWNLOAD -->
|
||||
<div class="progress-row">
|
||||
<div class="progress-label">Download</div>
|
||||
<div class="progressbar">
|
||||
<div
|
||||
class="progressbar-fill download"
|
||||
data-field="download_pct"
|
||||
style="width: 0%"
|
||||
></div>
|
||||
<div class="progressbar-text" data-field="download_text">0 / 0</div>
|
||||
<!-- Header -->
|
||||
<div class="progress-header">
|
||||
<h2>Progress</h2>
|
||||
{% if title %}
|
||||
<div class="progress-subtitle">{{ title }}</div>
|
||||
{% endif %} {% if book_id %}
|
||||
<div class="progress-bookid">Book ID: <span>{{ book_id }}</span></div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- DOWNLOAD SECTION -->
|
||||
<div class="progress-section">
|
||||
<h3>Download Progress</h3>
|
||||
|
||||
<div class="progress-bar">
|
||||
{% set pct = 0 %} {% if download_total > 0 %} {% set pct = (100 *
|
||||
download_done / download_total) | round(1) %} {% endif %}
|
||||
<div class="progress-bar-fill" style="width: {{ pct }}%;"></div>
|
||||
</div>
|
||||
|
||||
<div class="progress-stats">
|
||||
<span>{{ download_done }} / {{ download_total }}</span>
|
||||
<span>{{ pct }}%</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- AUDIO -->
|
||||
<div class="progress-row">
|
||||
<div class="progress-label">Audio</div>
|
||||
<div class="progressbar">
|
||||
<!-- AUDIO SECTION -->
|
||||
<div class="progress-section">
|
||||
<h3>Audio Progress</h3>
|
||||
|
||||
<div class="progress-bar audio">
|
||||
{% set pct2 = 0 %} {% if audio_total > 0 %} {% set pct2 = (100 *
|
||||
audio_done / audio_total) | round(1) %} {% endif %}
|
||||
<div
|
||||
class="progressbar-fill audio"
|
||||
data-field="audio_pct"
|
||||
style="width: 0%"
|
||||
class="progress-bar-fill audio-fill"
|
||||
style="width: {{ pct2 }}%;"
|
||||
></div>
|
||||
<div class="progressbar-text" data-field="audio_text">0 / 0</div>
|
||||
</div>
|
||||
|
||||
<div class="progress-stats">
|
||||
<span>{{ audio_done }} / {{ audio_total }}</span>
|
||||
<span>{{ pct2 }}%</span>
|
||||
</div>
|
||||
</div>
|
||||
<script src="/static/js/progress.js"></script>
|
||||
</div>
|
||||
|
||||
@ -1,21 +0,0 @@
|
||||
{# ============================================================ File:
|
||||
templates/components/registered_books.html Purpose: Toon een grid van
|
||||
geregistreerde boeken. Elke kaart wordt gerenderd via bookcard.html.
|
||||
============================================================ #}
|
||||
|
||||
<section class="dashboard-section">
|
||||
<h2>Geregistreerde boeken</h2>
|
||||
|
||||
{% if registered and registered|length > 0 %}
|
||||
|
||||
<div class="registered-grid">
|
||||
{% for b in registered %} {% include "components/bookcard.html" %} {% endfor
|
||||
%}
|
||||
</div>
|
||||
|
||||
{% else %}
|
||||
|
||||
<p>Geen geregistreerde boeken.</p>
|
||||
|
||||
{% endif %}
|
||||
</section>
|
||||
@ -1,95 +0,0 @@
|
||||
{# ============================================================ File:
|
||||
templates/debug/inspect_state.html Purpose: Inspect SQLite vs Redis state per
|
||||
book_idx - Initial render via Jinja - Live updates via inspect_state.js -
|
||||
BookCard is server-rendered and NEVER replaced - Only the right-side state table
|
||||
is updated dynamically
|
||||
============================================================ #} {% extends
|
||||
"layout.html" %} {% block content %}
|
||||
|
||||
<h1>State Inspection (SQL vs Redis)</h1>
|
||||
|
||||
<style>
|
||||
.state-block {
|
||||
display: grid;
|
||||
grid-template-columns: 380px 1fr;
|
||||
gap: 20px;
|
||||
margin-bottom: 35px;
|
||||
padding: 18px;
|
||||
border: 1px solid #444;
|
||||
background: #222;
|
||||
border-radius: 8px;
|
||||
}
|
||||
|
||||
.state-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
.state-table th,
|
||||
.state-table td {
|
||||
border: 1px solid #555;
|
||||
padding: 6px 10px;
|
||||
}
|
||||
|
||||
.state-table th {
|
||||
background: #333;
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
.state-table td {
|
||||
background: #2a2a2a;
|
||||
color: #ddd;
|
||||
}
|
||||
|
||||
.same {
|
||||
color: #9f9 !important;
|
||||
}
|
||||
|
||||
.diff {
|
||||
color: #ff7b7b !important;
|
||||
font-weight: bold;
|
||||
}
|
||||
</style>
|
||||
|
||||
<div id="state-container">
|
||||
{% for entry in results %}
|
||||
<div class="state-block" data-book-idx="{{ entry.book_idx }}">
|
||||
<!-- LEFT: BookCard (server-rendered, NEVER replaced) -->
|
||||
<div>
|
||||
{% if entry.card %} {% with b = entry.card %} {% include
|
||||
"components/bookcard.html" %} {% endwith %} {% else %}
|
||||
<strong>{{ entry.book_idx }}</strong>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- RIGHT: State table (updated by JS) -->
|
||||
<div>
|
||||
<table class="state-table">
|
||||
<tr>
|
||||
<th>Field</th>
|
||||
<th>SQLite</th>
|
||||
<th>Redis</th>
|
||||
<th>Merged</th>
|
||||
</tr>
|
||||
|
||||
{% set sql = entry.sqlite %} {% set redis = entry.redis %} {% set merged
|
||||
= entry.would_merge_to %} {% for field in [ "status", "chapters_total",
|
||||
"downloaded", "chapters_download_done", "chapters_download_skipped",
|
||||
"parsed", "chapters_parsed_done", "audio_done", "audio_skipped",
|
||||
"last_update" ] %}
|
||||
<tr>
|
||||
<th>{{ field }}</th>
|
||||
<td>{{ sql.get(field, "") }}</td>
|
||||
<td>{{ redis.get(field, "") }}</td>
|
||||
<td>{{ merged.get(field, "") }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
{% endblock %} {% block scripts %}
|
||||
<script src="/static/js/inspect_state.js"></script>
|
||||
{% endblock %}
|
||||
@ -1,91 +0,0 @@
|
||||
{% extends "layout.html" %} {% block content %}
|
||||
<h1>Celery Queue Debug</h1>
|
||||
|
||||
<style>
|
||||
.debug-section {
|
||||
margin-bottom: 40px;
|
||||
}
|
||||
.debug-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.debug-table th,
|
||||
.debug-table td {
|
||||
border: 1px solid #444;
|
||||
padding: 6px 10px;
|
||||
}
|
||||
.debug-table th {
|
||||
background: #333;
|
||||
color: #fff;
|
||||
}
|
||||
pre {
|
||||
background: #1e1e1e;
|
||||
color: #ddd;
|
||||
padding: 10px;
|
||||
overflow-x: auto;
|
||||
}
|
||||
code {
|
||||
color: #9cf;
|
||||
}
|
||||
</style>
|
||||
|
||||
<div class="debug-section">
|
||||
<h2>Workers</h2>
|
||||
|
||||
<h3>Active Tasks</h3>
|
||||
<pre>{{ workers_active | tojson(indent=2) }}</pre>
|
||||
|
||||
<h3>Reserved</h3>
|
||||
<pre>{{ workers_reserved | tojson(indent=2) }}</pre>
|
||||
|
||||
<h3>Scheduled</h3>
|
||||
<pre>{{ workers_scheduled | tojson(indent=2) }}</pre>
|
||||
</div>
|
||||
|
||||
<hr />
|
||||
|
||||
<div class="debug-section">
|
||||
<h2>Queues</h2>
|
||||
|
||||
{% for q in queues %}
|
||||
<div class="debug-queue">
|
||||
<h3>{{ q.name }} ({{ q.length }} items)</h3>
|
||||
|
||||
<table class="debug-table">
|
||||
<tr>
|
||||
<th>Redis Key</th>
|
||||
<td>{{ q.redis_key }}</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<th>Length</th>
|
||||
<td>{{ q.length }}</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<th>Items (first 30)</th>
|
||||
<td>
|
||||
{% if q["items"] %}
|
||||
<ul style="margin: 0; padding-left: 20px">
|
||||
{% for item in q["items"] %}
|
||||
<li><code>{{ item | e }}</code></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<i>No items</i>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
<script>
|
||||
setInterval(() => {
|
||||
window.location.reload();
|
||||
}, 5000);
|
||||
</script>
|
||||
|
||||
{% endblock %}
|
||||
@ -1,53 +1,34 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="nl">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>BookScraper</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
padding: 40px;
|
||||
max-width: 600px;
|
||||
margin: auto;
|
||||
}
|
||||
h1 {
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
input[type="text"] {
|
||||
width: 100%;
|
||||
padding: 12px;
|
||||
font-size: 16px;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 6px;
|
||||
}
|
||||
button {
|
||||
margin-top: 20px;
|
||||
padding: 12px 20px;
|
||||
background: #007bff;
|
||||
color: white;
|
||||
border: none;
|
||||
border-radius: 6px;
|
||||
font-size: 16px;
|
||||
cursor: pointer;
|
||||
}
|
||||
button:hover {
|
||||
background: #0056b3;
|
||||
}
|
||||
body { font-family: Arial, sans-serif; padding: 40px; max-width: 600px; margin: auto; }
|
||||
h1 { margin-bottom: 20px; }
|
||||
input[type="text"] {
|
||||
width: 100%; padding: 12px; font-size: 16px;
|
||||
border: 1px solid #ccc; border-radius: 6px;
|
||||
}
|
||||
button {
|
||||
margin-top: 20px;
|
||||
padding: 12px 20px;
|
||||
background: #007bff; color: white;
|
||||
border: none; border-radius: 6px;
|
||||
font-size: 16px; cursor: pointer;
|
||||
}
|
||||
button:hover { background: #0056b3; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>BookScraper WebGUI</h1>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<form action="/init" method="POST">
|
||||
<label for="url">Geef een boek-URL op:</label><br /><br />
|
||||
<input
|
||||
type="text"
|
||||
id="url"
|
||||
name="url"
|
||||
placeholder="https://example.com/book/12345"
|
||||
required
|
||||
/>
|
||||
<button type="submit">Start Scraping</button>
|
||||
</form>
|
||||
</body>
|
||||
<h1>BookScraper WebGUI</h1>
|
||||
|
||||
<form action="/start" method="POST">
|
||||
<label for="url">Geef een boek-URL op:</label><br><br>
|
||||
<input type="text" id="url" name="url" placeholder="https://example.com/book/12345" required>
|
||||
<button type="submit">Start Scraping</button>
|
||||
</form>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@ -1,13 +0,0 @@
|
||||
#!/bin/sh
|
||||
# mp4info shim for m4b-tool (ffprobe-based)
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "Usage: mp4info <file>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ffprobe outputs float seconds; m4b-tool expects an integer
|
||||
ffprobe -v error \
|
||||
-show_entries format=duration \
|
||||
-of default=noprint_wrappers=1:nokey=1 \
|
||||
"$1" | awk '{ printf "%d\n", ($1 + 0.5) }'
|
||||
Loading…
Reference in new issue