From feb8ca60d7687156d7859b6c31e0fade912952fe Mon Sep 17 00:00:00 2001 From: "peter.fong" Date: Mon, 8 Dec 2025 22:24:17 +0100 Subject: [PATCH] extra book metadata --- bookscraper/README.md | 10 + bookscraper/app.py | 220 ++++++++++-- bookscraper/audio_worker_local.py | 1 + bookscraper/celery_app.py | 11 + bookscraper/db/db.py | 37 +- bookscraper/db/repository.py | 340 ++++++++++++------ bookscraper/db/state_redis.py | 79 ++++ bookscraper/db/state_sql.py | 165 +++++++++ bookscraper/logbus/publisher.py | 22 ++ .../scraper/{progress.py => _dep_progress.py} | 0 bookscraper/scraper/abort.py | 2 + bookscraper/scraper/book_scraper.py | 59 ++- bookscraper/scraper/download_controller.py | 8 - bookscraper/scraper/logger_decorators.py | 33 ++ bookscraper/scraper/scriptgen.py | 2 + bookscraper/scraper/services/cover_service.py | 3 +- bookscraper/scraper/services/init_service.py | 25 +- bookscraper/scraper/services/scrape_engine.py | 284 ++++++++++++++- bookscraper/scraper/sites/base.py | 3 +- bookscraper/scraper/sites/piaotian.py | 3 +- bookscraper/scraper/tasks/audio_tasks.py | 142 +++++--- bookscraper/scraper/tasks/controller_tasks.py | 11 +- bookscraper/scraper/tasks/download_tasks.py | 156 ++++---- bookscraper/scraper/tasks/parse_tasks.py | 189 +++++----- bookscraper/scraper/tasks/pipeline.py | 44 ++- bookscraper/scraper/tasks/progress_tasks.py | 57 --- bookscraper/scraper/tasks/save_tasks.py | 176 ++++----- bookscraper/scraper/tasks/scraping.py | 1 + .../utils/__init__.py} | 0 bookscraper/scraper/utils/state_sync.py | 141 ++++++++ bookscraper/scraper/{ => utils}/utils.py | 0 bookscraper/static/covers/一剑朝天.jpg | Bin 4238 -> 0 bytes bookscraper/static/covers/从吞噬开始.jpg | Bin 6555 -> 0 bytes bookscraper/static/covers/流氓高手.jpg | Bin 3625 -> 0 bytes bookscraper/static/covers/流氓高手II.jpg | Bin 11957 -> 0 bytes bookscraper/static/css/bookcard.css | 201 +++++++++++ bookscraper/static/css/dashboard.css | 171 +++++---- bookscraper/static/js/dashboard.js | 223 +++++++----- bookscraper/static/js/log_view.js | 132 ++++--- .../templates/components/bookcard.html | 82 +++++ bookscraper/templates/components/nav.html | 18 +- .../components/registered_books.html | 21 ++ .../templates/components/url_input.html | 2 +- .../templates/dashboard/dashboard.html | 3 + .../templates/debug/inspect_state.html | 88 +++++ bookscraper/templates/debug/queues.html | 91 +++++ bookscraper/templates/layout.html | 6 + bookscraper/worker/downloader.py | 10 +- 48 files changed, 2413 insertions(+), 859 deletions(-) create mode 100644 bookscraper/db/state_redis.py create mode 100644 bookscraper/db/state_sql.py rename bookscraper/scraper/{progress.py => _dep_progress.py} (100%) create mode 100644 bookscraper/scraper/logger_decorators.py delete mode 100644 bookscraper/scraper/tasks/progress_tasks.py rename bookscraper/{templates/components/book_card.html => scraper/utils/__init__.py} (100%) create mode 100644 bookscraper/scraper/utils/state_sync.py rename bookscraper/scraper/{ => utils}/utils.py (100%) delete mode 100644 bookscraper/static/covers/一剑朝天.jpg delete mode 100644 bookscraper/static/covers/从吞噬开始.jpg delete mode 100644 bookscraper/static/covers/流氓高手.jpg delete mode 100644 bookscraper/static/covers/流氓高手II.jpg create mode 100644 bookscraper/static/css/bookcard.css create mode 100644 bookscraper/templates/components/bookcard.html create mode 100644 bookscraper/templates/components/registered_books.html create mode 100644 bookscraper/templates/debug/inspect_state.html create mode 100644 bookscraper/templates/debug/queues.html diff --git a/bookscraper/README.md b/bookscraper/README.md index 103eded..ab2775e 100644 --- a/bookscraper/README.md +++ b/bookscraper/README.md @@ -135,6 +135,16 @@ docker compose down docker compose build docker compose up +docker compose up -d + +docker compose build web && docker compose up web + +docker compose build worker_download && docker compose up worker_download + +docker compose up web +docker compose build web +docker compose restart web + tar \ --exclude="**pycache**" \ --exclude="_/**pycache**/_" \ diff --git a/bookscraper/app.py b/bookscraper/app.py index 0367408..ce713d6 100644 --- a/bookscraper/app.py +++ b/bookscraper/app.py @@ -6,29 +6,43 @@ from dotenv import load_dotenv load_dotenv() import os -import redis -from flask import Flask, render_template, request, jsonify, send_from_directory +from flask import ( + Flask, + render_template, + request, + jsonify, + send_from_directory, + redirect, + url_for, +) print(">>> [WEB] Importing celery_app …") from celery_app import celery_app -from db.db import init_db from celery.result import AsyncResult +from db.db import init_db +from db.repository import ( + get_registered_books, + fetch_book, + fetch_all_books, + get_progress, +) + from scraper.logger import log_debug from scraper.abort import set_abort -from scraper.progress import get_progress from scraper.ui_log import get_ui_logs, reset_ui_logs, get_ui_logs_delta from scraper.state import state as r - +from scraper.logger_decorators import logcall +from scraper.utils.state_sync import sync_books_from_redis from scraper.services.init_service import InitService -from db.repository import get_registered_books # INIT DB init_db() app = Flask(__name__) + # ===================================================== # STATIC FILE SERVING # ===================================================== @@ -36,6 +50,7 @@ OUTPUT_ROOT = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output") @app.route("/output/") +@logcall def serve_output(filename): return send_from_directory(OUTPUT_ROOT, filename, as_attachment=False) @@ -46,22 +61,29 @@ def serve_output(filename): @app.route("/", methods=["GET"]) +@logcall def index(): - return render_template("index.html") + return redirect(url_for("dashboard")) @app.route("/dashboard", methods=["GET"]) +@logcall def dashboard(): logs_list = get_ui_logs() or [] + + # Filter hidden books ONLY for GUI + reg = [b for b in get_registered_books() if b.get("status") != "hidden"] + return render_template( "dashboard/dashboard.html", books=list_active_books(), # Redis - registered=get_registered_books(), # SQLite INIT results + registered=reg, # SQLite (filtered) logs=logs_list, ) @app.route("/book/") +@logcall def book_detail(book_id): title = r.get(f"book:{book_id}:title") or book_id return render_template( @@ -73,20 +95,19 @@ def book_detail(book_id): # ===================================================== -# SECTION 2 — ACTION ROUTES (INIT, START, ABORT) +# SECTION 2 — ACTION ROUTES (INIT, START, ABORT, HIDE) # ===================================================== -# CORRECT PATH — services/ is root-level - @app.route("/init", methods=["POST"]) +@logcall def init_book(): """ INIT-flow: - user enters URL - - lightweight metadata fetch + - metadata fetch - insert into SQLite as 'registered' - - return dashboard HTML (NOT JSON) + - return dashboard """ url = request.form.get("url", "").strip() @@ -103,39 +124,63 @@ def init_book(): result = InitService.execute(url) msg = f"Boek geregistreerd: {result.get('title')}" + reg = [b for b in get_registered_books() if b.get("status") != "hidden"] + return render_template( "dashboard/dashboard.html", message=msg, - books=list_active_books(), # Redis - registered=get_registered_books(), # SQLite INIT results + books=list_active_books(), + registered=reg, logs=get_ui_logs(), ) except Exception as e: log_debug(f"[INIT] ERROR: {e}") + reg = [b for b in get_registered_books() if b.get("status") != "hidden"] return render_template( "dashboard/dashboard.html", error=f"INIT mislukt: {e}", books=list_active_books(), - registered=get_registered_books(), + registered=reg, logs=get_ui_logs(), ) +@app.route("/hide/", methods=["POST"]) +@logcall +def hide_registered_book(book_id): + """ + Soft-delete/hide voor GUI. + De DB blijft intact. + """ + # try: + # hide_book(book_id) + # return redirect("/dashboard") + # # return jsonify({"status": "ok", "hidden": book_id}) + # except Exception as e: + # return jsonify({"status": "error", "message": str(e)}), 500 + + @app.route("/start", methods=["POST"]) +@logcall def start_scraping(): - url = request.form.get("url", "").strip() + """ + Start FULL scraping vanuit een geregistreerd INIT-record. + """ + book_id = request.form.get("book_id") + if not book_id: + return jsonify({"status": "error", "message": "book_id ontbreekt"}), 400 + + book = fetch_book(book_id) + if not book: + return jsonify({"status": "error", "message": "Boek niet gevonden"}), 404 + + url = book.get("book_url") if not url: - return render_template( - "dashboard/dashboard.html", - error="Geen URL opgegeven.", - books=list_active_books(), - registered=get_registered_books(), - logs=get_ui_logs(), - ) + return jsonify({"status": "error", "message": "book_url ontbreekt"}), 500 reset_ui_logs() - log_debug(f"[WEB] Scraping via Celery: {url}") + log_debug(f"[WEB] Starting FULL scrape for book_id={book_id}, url={url}") async_result = celery_app.send_task( "scraper.tasks.scraping.start_scrape_book", @@ -143,16 +188,19 @@ def start_scraping(): queue="scraping", ) + reg = [b for b in get_registered_books() if b.get("status") != "hidden"] + return render_template( "dashboard/dashboard.html", scraping_task_id=async_result.id, books=list_active_books(), - registered=get_registered_books(), + registered=reg, logs=get_ui_logs(), ) @app.route("/abort/", methods=["POST"]) +@logcall def abort_download(book_id): log_debug(f"[WEB] Abort requested for book: {book_id}") set_abort(book_id) @@ -165,27 +213,32 @@ def abort_download(book_id): @app.route("/api/books") +@logcall def api_books(): return jsonify(list_active_books()) @app.route("/api/book//status") +@logcall def api_book_status(book_id): return jsonify(getStatus(book_id)) @app.route("/api/book//logs") +@logcall def api_book_logs(book_id): logs = r.lrange(f"logs:{book_id}", 0, -1) or [] return jsonify(logs) @app.route("/progress/") +@logcall def progress(book_id): return jsonify(get_progress(book_id)) @app.route("/celery-result/") +@logcall def celery_result(task_id): result = AsyncResult(task_id, app=celery_app) if result.successful(): @@ -196,32 +249,66 @@ def celery_result(task_id): @app.route("/clear-logs", methods=["POST"]) +@logcall def clear_logs(): reset_ui_logs() - return jsonify({"status": "ok", "message": "UI logs cleared"}) + return jsonify({"status": "ok"}) @app.route("/logs", methods=["GET"]) +@logcall def logs(): + # LAST_LOG_INDEX vanuit de client (default = -1 bij eerste call) try: last_index = int(request.args.get("last_index", -1)) except: last_index = -1 - new_lines, total = get_ui_logs_delta(last_index) - return jsonify({"lines": new_lines, "total": total}) + # Haal volledige huidige loglijst op + all_logs = get_ui_logs() or [] + + # Delta: alle regels met index > last_index + new_lines = [] + new_last = last_index + + for idx, line in enumerate(all_logs): + if idx > last_index: + new_lines.append(line) + new_last = idx + + return jsonify({"lines": new_lines, "last": new_last}) # ===================================================== # SECTION 4 — DEBUG ROUTES # ===================================================== +@app.route("/debug/sync_state", methods=["GET"]) +def debug_sync_state(): + results = sync_books_from_redis() + return {"status": "ok", "synced": results} + + +from scraper.utils.state_sync import inspect_books_state + + +@app.route("/debug/inspect_state", methods=["GET"]) +def debug_inspect_state(): + """ + Shows: + - raw SQLite values, + - raw Redis values, + - what the merged result WOULD be. + No writes happen. + """ + results = inspect_books_state() + return render_template("debug/inspect_state.html", results=results) @app.route("/debug/redis-keys") +@logcall def debug_redis_keys(): cursor = 0 results = {} - while True: cursor, keys = r.scan(cursor, match="*", count=200) for k in keys: @@ -231,22 +318,17 @@ def debug_redis_keys(): results[k] = "" if cursor == 0: break - return jsonify(results) # ===================================================== -# DB DEBUG: LIST ALL BOOKS FROM SQLITE +# DB DEBUG # ===================================================== -from db.repository import fetch_all_books @app.route("/api/db/books") +@logcall def api_db_books(): - """ - Return ALL books stored in SQLite — including INIT-only entries. - Useful to verify that /init wrote correct metadata. - """ try: books = fetch_all_books() return jsonify({"status": "ok", "books": books}) @@ -254,11 +336,74 @@ def api_db_books(): return jsonify({"status": "error", "message": str(e)}), 500 +# ============================================= +# DEBUG QUEUE VIEW (HTML) +# ============================================= +from flask import render_template +from urllib.parse import urlparse +import redis +import os +from celery_app import celery_app + + +@app.route("/debug/queues") +def debug_queues(): + insp = celery_app.control.inspect() + + workers_active = insp.active() or {} + workers_scheduled = insp.scheduled() or {} + workers_reserved = insp.reserved() or {} + + # ---- Redis connection ---- + redis_url = os.getenv("REDIS_BROKER") + parsed = urlparse(redis_url) + + r = redis.Redis( + host=parsed.hostname, + port=parsed.port, + db=int(parsed.path.strip("/") or 0), + decode_responses=True, + ) + + queue_names = ["scraping", "controller", "download", "parse", "save", "audio"] + + queues = [] + for q in queue_names: + key = f"celery:{q}" + try: + queues.append( + { + "name": q, + "redis_key": key, + "length": r.llen(key), + "items": r.lrange(key, 0, 30), # first 30 entries + } + ) + except Exception as e: + queues.append( + { + "name": q, + "redis_key": key, + "length": "ERR", + "items": [str(e)], + } + ) + + return render_template( + "debug/queues.html", + queues=queues, + workers_active=workers_active, + workers_reserved=workers_reserved, + workers_scheduled=workers_scheduled, + ) + + # ===================================================== # SECTION 5 — INTERNAL HELPERS # ===================================================== +@logcall def getStatus(book_id): state = r.hgetall(f"book:{book_id}:state") status = state.get("status") or "unknown" @@ -280,6 +425,7 @@ def getStatus(book_id): } +@logcall def list_active_books(): books = [] for key in r.scan_iter(match="book:*:state", count=1000): diff --git a/bookscraper/audio_worker_local.py b/bookscraper/audio_worker_local.py index 4affa93..229e2f0 100644 --- a/bookscraper/audio_worker_local.py +++ b/bookscraper/audio_worker_local.py @@ -54,6 +54,7 @@ def main(): "-l", "INFO", "--pool=prefork", + "--concurrency=2", ] print("[AUDIO-LOCAL] Launching Celery via subprocess…") diff --git a/bookscraper/celery_app.py b/bookscraper/celery_app.py index adf08bd..2aa97ba 100644 --- a/bookscraper/celery_app.py +++ b/bookscraper/celery_app.py @@ -32,6 +32,17 @@ celery_app = Celery( ], ) +# >>>>> PLAATS DIT HIER <<<<< +celery_app.conf.update( + worker_redirect_stdouts_level="WARNING", + task_send_sent_event=False, + resultrepr_maxsize=0, + worker_log_format="[%(asctime)s: %(levelname)s/%(processName)s] %(message)s", + worker_task_log_format="[%(asctime)s: %(levelname)s/%(processName)s] %(message)s", +) +# >>>>> TOT HIER <<<<< + + celery_app.conf.task_routes = { "scraper.tasks.scraping.*": {"queue": "scraping"}, "scraper.tasks.controller_tasks.*": {"queue": "controller"}, diff --git a/bookscraper/db/db.py b/bookscraper/db/db.py index 4706d0e..f9d4363 100644 --- a/bookscraper/db/db.py +++ b/bookscraper/db/db.py @@ -60,9 +60,10 @@ def init_db(): book_id TEXT PRIMARY KEY, title TEXT, author TEXT, - + description TEXT, cover_url TEXT, cover_path TEXT, + book_url TEXT, chapters_total INTEGER, @@ -72,6 +73,7 @@ def init_db(): audio_done INTEGER DEFAULT 0, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + processdate DATETIME, last_update DATETIME ); """ @@ -79,14 +81,38 @@ def init_db(): conn.commit() # -------------------------------------------------------- - # SCHEMA UPGRADE: add description column if missing + # SCHEMA UPGRADE UTIL # -------------------------------------------------------- + def add_column(name, type_): + try: + conn.execute(f"ALTER TABLE books ADD COLUMN {name} {type_};") + except: + pass # column already exists + cols = conn.execute("PRAGMA table_info(books);").fetchall() colnames = [c[1] for c in cols] - if "description" not in colnames: - conn.execute("ALTER TABLE books ADD COLUMN description TEXT;") - conn.commit() + # Existing: ensure new metadata fields exist + add_column("description", "TEXT") + add_column("cover_path", "TEXT") + add_column("book_url", "TEXT") + + # -------------------------------------------------------- + # NEW FIELDS — MATCH REDIS STATE MODEL (future-proof) + # These do NOT change logic, but enable repository snapshot sync. + # -------------------------------------------------------- + + # Download counters + add_column("chapters_download_done", "INTEGER DEFAULT 0") + add_column("chapters_download_skipped", "INTEGER DEFAULT 0") + + # Audio counters + add_column("audio_skipped", "INTEGER DEFAULT 0") + + # Optional future fields + add_column("audio_total", "INTEGER DEFAULT 0") + + conn.commit() # ------------------------------------------------------------ @@ -124,6 +150,5 @@ def _raw_get_book(book_id): def _raw_get_all_books(): conn = get_db() - # unchanged cur = conn.execute("SELECT * FROM books ORDER BY created_at DESC;") return [dict(row) for row in cur.fetchall()] diff --git a/bookscraper/db/repository.py b/bookscraper/db/repository.py index b34d871..faefc94 100644 --- a/bookscraper/db/repository.py +++ b/bookscraper/db/repository.py @@ -1,60 +1,164 @@ # ============================================================ # File: db/repository.py # Purpose: -# High-level BookScraper database interface. -# This is the ONLY module Celery tasks and Flask should use. +# Unified façade for BookScraper database state. # -# New additions for INIT-flow: -# - register_book() -# - update_book_after_full_scrape() -# - get_registered_books() -# - get_active_books() -# -# Existing functions remain unchanged for backward compatibility. +# Responsibilities: +# - Route metadata → SQLite +# - Route counters → Redis (live) + SQLite (snapshot) +# - Provide a clean API for tasks and Flask UI # ============================================================ -from db.db import ( - upsert_book, - _raw_get_book, - _raw_get_all_books, - get_db, +from scraper.logger_decorators import logcall +from logbus.publisher import log + +import redis +import os +import time + +# ============================================================ +# SQL low-level engines (snapshot storage) +# ============================================================ +from db.state_sql import ( + sql_fetch_book, + sql_fetch_all_books, + sql_set_status, + sql_set_chapters_total, + sql_register_book, + sql_update_book, + sql_inc_downloaded, + sql_inc_parsed, + sql_inc_audio_done, + sql_inc_audio_skipped, +) + +# ============================================================ +# REDIS low-level engines (live counters) +# ============================================================ +from db.state_redis import ( + redis_set_status, + redis_set_chapters_total, + redis_inc_download_done, + redis_inc_download_skipped, + redis_inc_parsed_done, + redis_inc_audio_done, + redis_inc_audio_skipped, ) +# ============================================================ +# Redis setup for legacy progress paths +# ============================================================ +REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0") +_r = redis.Redis.from_url(REDIS_URL, decode_responses=True) + + +# ============================================================ +# INTERNAL — legacy progress helpers +# ============================================================ +def _legacy_set_total(book_id, total): + _r.set(f"progress:{book_id}:total", total) + + +def _legacy_inc_completed(book_id): + _r.incr(f"progress:{book_id}:completed") + + +def _legacy_inc_skipped(book_id): + _r.incr(f"progress:{book_id}:skipped") + + +def _legacy_inc_failed(book_id): + _r.incr(f"progress:{book_id}:failed") -# ------------------------------------------------------------ -# FETCH OPERATIONS -# ------------------------------------------------------------ + +def _legacy_add_failed_chapter(book_id, chapter, reason): + entry = f"Chapter {chapter}: {reason}" + _r.rpush(f"progress:{book_id}:failed_list", entry) + + +def _legacy_get_failed_list(book_id): + return _r.lrange(f"progress:{book_id}:failed_list", 0, -1) + + +def _legacy_get_progress(book_id): + total = int(_r.get(f"progress:{book_id}:total") or 0) + completed = int(_r.get(f"progress:{book_id}:completed") or 0) + skipped = int(_r.get(f"progress:{book_id}:skipped") or 0) + failed = int(_r.get(f"progress:{book_id}:failed") or 0) + abort = _r.exists(f"abort:{book_id}") == 1 + failed_list = _legacy_get_failed_list(book_id) + + return { + "book_id": book_id, + "total": total, + "completed": completed, + "skipped": skipped, + "failed": failed, + "failed_list": failed_list, + "abort": abort, + } + + +# ============================================================ +# PUBLIC — UI-ready legacy progress access +# ============================================================ +@logcall +def get_progress(book_id): + return _legacy_get_progress(book_id) + + +@logcall +def add_failed_chapter(book_id, chapter, reason): + _legacy_add_failed_chapter(book_id, chapter, reason) + + +@logcall +def get_failed_list(book_id): + return _legacy_get_failed_list(book_id) + + +# ============================================================ +# FETCH OPERATIONS (SQLite snapshot) +# ============================================================ +@logcall def fetch_book(book_id): - """Return a single book dict or None.""" - return _raw_get_book(book_id) + return sql_fetch_book(book_id) +@logcall def fetch_all_books(): - """Return all books ordered newest → oldest.""" - return _raw_get_all_books() + return sql_fetch_all_books() # ============================================================ -# NEW — INIT-FLOW SUPPORT +# INIT-FLOW (SQLite metadata only) # ============================================================ +@logcall +def register_book( + book_id, + title, + author=None, + description=None, + cover_url=None, + cover_path=None, + book_url=None, +): - -def register_book(book_id, title, author=None, description=None, cover_url=None): - """ - Create a new book entry with initial metadata. - Called when user enters a URL and presses INIT. - """ fields = { "title": title, "author": author, "description": description, "cover_url": cover_url, + "cover_path": cover_path, + "book_url": book_url, "chapters_total": 0, "status": "registered", } - upsert_book(book_id, **fields) + log(f"[DB] Registering new book={book_id} title='{title}'") + sql_register_book(book_id, fields) +@logcall def update_book_after_full_scrape( book_id, title=None, @@ -63,10 +167,6 @@ def update_book_after_full_scrape( cover_url=None, chapters_total=None, ): - """ - Called after a FULL scrape when chapters are known. - Moves the book into 'active' state. - """ fields = {} if title is not None: @@ -82,94 +182,122 @@ def update_book_after_full_scrape( fields["status"] = "active" - upsert_book(book_id, **fields) + log(f"[DB] update full scrape metadata book={book_id}") + sql_update_book(book_id, fields) +# ============================================================ +# ACTIVE BOOK LISTS +# ============================================================ +@logcall def get_registered_books(): - """ - Return books registered but not yet scraped. - """ - conn = get_db() - cur = conn.execute( - """SELECT * FROM books WHERE status='registered' - ORDER BY created_at DESC""" - ) - return [dict(row) for row in cur.fetchall()] + all_books = sql_fetch_all_books() + return [b for b in all_books if b.get("status") == "registered"] +@logcall def get_active_books(): - """ - Return books currently in progress. - """ - conn = get_db() - cur = conn.execute( - """SELECT * FROM books - WHERE status IN ('active', 'downloading') - ORDER BY created_at DESC""" - ) - return [dict(row) for row in cur.fetchall()] - - -# ------------------------------------------------------------ -# BOOK CREATION / METADATA (existing) -# ------------------------------------------------------------ -def create_or_update_book( - book_id, - title=None, - author=None, - chapters_total=None, - cover_url=None, - cover_path=None, - status=None, -): - fields = {} + all_books = sql_fetch_all_books() + log(f"[DB] Fetched all books for active filter, total={len(all_books)}") + return [b for b in all_books if b.get("status") in ("active", "downloading")] - if title is not None: - fields["title"] = title - if author is not None: - fields["author"] = author - if chapters_total is not None: - fields["chapters_total"] = chapters_total - if cover_url is not None: - fields["cover_url"] = cover_url - if cover_path is not None: - fields["cover_path"] = cover_path - if status is not None: - fields["status"] = status - if fields: - upsert_book(book_id, **fields) +# ============================================================ +# STATUS MANAGEMENT +# ============================================================ +@logcall +def set_status(book_id, status): + log(f"[DB] Setting status for {book_id} to '{status}'") + redis_set_status(book_id, status) + sql_set_status(book_id, status) -# ------------------------------------------------------------ -# STATUS MANAGEMENT (existing) -# ------------------------------------------------------------ -def set_status(book_id, status): - upsert_book(book_id, status=status) +# ============================================================ +# CHAPTER TOTALS +# ============================================================ +@logcall +def set_chapters_total(book_id, total): + log(f"[DB] Setting chapter total for {book_id} to {total}") + redis_set_chapters_total(book_id, total) + sql_set_chapters_total(book_id, total) + _legacy_set_total(book_id, total) # integrate legacy progress + + +# ============================================================ +# COUNTERS — DOWNLOAD +# ============================================================ +@logcall +def inc_download_done(book_id, amount=1): + log(f"[DB] Incrementing download done for {book_id} by {amount}") + redis_inc_download_done(book_id, amount) + sql_inc_downloaded(book_id, amount) + _legacy_inc_completed(book_id) + + +@logcall +def inc_download_skipped(book_id, amount=1): + log(f"[DB] Incrementing download skipped for {book_id} by {amount}") + redis_inc_download_skipped(book_id, amount) + _legacy_inc_skipped(book_id) + + +# ============================================================ +# COUNTERS — PARSE +# ============================================================ +@logcall +def inc_parsed_done(book_id, amount=1): + log(f"[DB] Incrementing parsed done for {book_id} by {amount}") + redis_inc_parsed_done(book_id, amount) + sql_inc_parsed(book_id, amount) -# ------------------------------------------------------------ -# INCREMENTING COUNTERS (existing — backward compat only) -# ------------------------------------------------------------ +# ============================================================ +# COUNTERS — AUDIO +# ============================================================ +# ============================================================ +# COUNTERS — AUDIO SKIPPED +# ============================================================ +@logcall +def inc_audio_skipped(book_id, amount=1): + log(f"[DB] Incrementing audio skipped for {book_id} by {amount}") + # Redis live counter (maak deze functie in state_redis wanneer nodig) + sql_inc_audio_skipped(book_id, amount) + redis_inc_audio_skipped(book_id, amount) + # Geen SQLite kolom? Dan overslaan. + + +@logcall +def inc_audio_done(book_id, amount=1): + log(f"[DB] Incrementing audio done for {book_id} by {amount}") + redis_inc_audio_done(book_id, amount) + sql_inc_audio_done(book_id, amount) + + +# ============================================================ +# BACKWARDS COMPATIBILITY SHIMS (old task API) +# ============================================================ + + +@logcall def inc_downloaded(book_id, amount=1): - book = _raw_get_book(book_id) - if not book: - return - cur = book.get("downloaded", 0) or 0 - upsert_book(book_id, downloaded=cur + amount) + """ + Old name used by older tasks. + Redirects to new unified counter. + """ + return inc_download_done(book_id, amount) +@logcall def inc_parsed(book_id, amount=1): - book = _raw_get_book(book_id) - if not book: - return - cur = book.get("parsed", 0) or 0 - upsert_book(book_id, parsed=cur + amount) + """ + Old name used by older tasks. + """ + return inc_parsed_done(book_id, amount) -def inc_audio_done(book_id, amount=1): - book = _raw_get_book(book_id) - if not book: - return - cur = book.get("audio_done", 0) or 0 - upsert_book(book_id, audio_done=cur + amount) +@logcall +def inc_audio_done_legacy(book_id, amount=1): + """ + Old audio name used by older tasks. + """ + return inc_audio_done(book_id, amount) diff --git a/bookscraper/db/state_redis.py b/bookscraper/db/state_redis.py new file mode 100644 index 0000000..232d7af --- /dev/null +++ b/bookscraper/db/state_redis.py @@ -0,0 +1,79 @@ +# ============================================================ +# File: db/state_redis.py +# Purpose: +# Low-level Redis counters/state for BookScraper. +# Used ONLY by db.repository façade. +# ============================================================ + +import os +import time +import redis + +from logbus.publisher import log + +REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0") +r = redis.Redis.from_url(REDIS_URL, decode_responses=True) + + +# ------------------------------------------------------------ +# STATUS +# ------------------------------------------------------------ +def redis_set_status(book_id: str, status: str): + key = f"book:{book_id}:state" + r.hset(key, "status", status) + r.hset(key, "last_update", int(time.time())) + + +# ------------------------------------------------------------ +# TOTAL CHAPTERS +# ------------------------------------------------------------ +def redis_set_chapters_total(book_id: str, total: int): + key = f"book:{book_id}:state" + r.hset(key, "chapters_total", total) + r.hset(key, "last_update", int(time.time())) + + +# ------------------------------------------------------------ +# DOWNLOAD COUNTERS +# ------------------------------------------------------------ +def redis_inc_download_done(book_id: str, amount: int = 1): + key = f"book:{book_id}:state" + r.hincrby(key, "chapters_download_done", amount) + r.hset(key, "last_update", int(time.time())) + + +def redis_inc_download_skipped(book_id: str, amount: int = 1): + log(f"[DB-REDIS] Incrementing download skipped for {book_id} by {amount}") + key = f"book:{book_id}:state" + r.hincrby(key, "chapters_download_skipped", amount) + r.hset(key, "last_update", int(time.time())) + + +# ------------------------------------------------------------ +# PARSE COUNTERS +# ------------------------------------------------------------ +def redis_inc_parsed_done(book_id: str, amount: int = 1): + key = f"book:{book_id}:state" + r.hincrby(key, "chapters_parsed_done", amount) + r.hset(key, "last_update", int(time.time())) + + +# ------------------------------------------------------------ +# AUDIO COUNTERS +# ------------------------------------------------------------ +def redis_inc_audio_done(book_id: str, amount: int = 1): + log(f"[DB-REDIS] Incrementing audio done for {book_id} by {amount}") + key = f"book:{book_id}:state" + r.hincrby(key, "audio_done", amount) + r.hset(key, "last_update", int(time.time())) + + +def redis_inc_audio_skipped(book_id: str, amount: int = 1): + log(f"[DB-REDIS] Incrementing audio skipped for {book_id} by {amount}") + """ + New: Count skipped audio chapters (timeouts, pre-existing files, abort, etc.) + SQL does NOT track this; Redis-only metric. + """ + key = f"book:{book_id}:state" + r.hincrby(key, "audio_skipped", amount) + r.hset(key, "last_update", int(time.time())) diff --git a/bookscraper/db/state_sql.py b/bookscraper/db/state_sql.py new file mode 100644 index 0000000..ec7626a --- /dev/null +++ b/bookscraper/db/state_sql.py @@ -0,0 +1,165 @@ +# ============================================================ +# File: db/state_sql.py +# Purpose: +# Low-level SQLite snapshot layer for BookScraper metadata. +# Used ONLY through db.repository façade. +# ============================================================ + +import sqlite3 +import os + +from logbus.publisher import log + +DB_PATH = os.getenv("BOOKSCRAPER_DB", "/app/db/books.db") + + +# ------------------------------------------------------------ +# INTERNAL HELPERS +# ------------------------------------------------------------ +def _connect(): + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + return conn + + +# ------------------------------------------------------------ +# FETCH +# ------------------------------------------------------------ +def sql_fetch_book(book_id): + conn = _connect() + cur = conn.cursor() + cur.execute("SELECT * FROM books WHERE book_id = ?", (book_id,)) + row = cur.fetchone() + conn.close() + return dict(row) if row else None + + +def sql_fetch_all_books(): + conn = _connect() + cur = conn.cursor() + cur.execute("SELECT * FROM books ORDER BY rowid DESC") + rows = cur.fetchall() + conn.close() + return [dict(r) for r in rows] + + +# ------------------------------------------------------------ +# REGISTER / UPDATE +# ------------------------------------------------------------ +def sql_register_book(book_id, fields: dict): + conn = _connect() + cur = conn.cursor() + + cols = ", ".join(["book_id"] + list(fields.keys())) + placeholders = ", ".join(["?"] * (1 + len(fields))) + values = [book_id] + list(fields.values()) + + cur.execute( + f"INSERT OR REPLACE INTO books ({cols}) VALUES ({placeholders})", values + ) + conn.commit() + conn.close() + + +def sql_update_book(book_id, fields: dict): + if not fields: + return + + conn = _connect() + cur = conn.cursor() + + set_clause = ", ".join([f"{k} = ?" for k in fields]) + params = list(fields.values()) + [book_id] + + cur.execute(f"UPDATE books SET {set_clause} WHERE book_id = ?", params) + conn.commit() + conn.close() + + +# ------------------------------------------------------------ +# STATUS +# ------------------------------------------------------------ +def sql_set_status(book_id, status: str): + conn = _connect() + cur = conn.cursor() + cur.execute("UPDATE books SET status = ? WHERE book_id = ?", (status, book_id)) + conn.commit() + conn.close() + + +# ------------------------------------------------------------ +# CHAPTER TOTAL (snapshot) +# ------------------------------------------------------------ +def sql_set_chapters_total(book_id, total: int): + conn = _connect() + cur = conn.cursor() + cur.execute( + "UPDATE books SET chapters_total = ? WHERE book_id = ?", (total, book_id) + ) + conn.commit() + conn.close() + + +# ------------------------------------------------------------ +# COUNTERS (SNAPSHOT-ONLY) +# ------------------------------------------------------------ +def sql_inc_downloaded(book_id, amount=1): + conn = _connect() + cur = conn.cursor() + cur.execute( + """ + UPDATE books + SET downloaded = COALESCE(downloaded,0) + ? + WHERE book_id = ? + """, + (amount, book_id), + ) + conn.commit() + conn.close() + + +def sql_inc_parsed(book_id, amount=1): + conn = _connect() + cur = conn.cursor() + cur.execute( + """ + UPDATE books + SET parsed = COALESCE(parsed,0) + ? + WHERE book_id = ? + """, + (amount, book_id), + ) + conn.commit() + conn.close() + + +def sql_inc_audio_done(book_id, amount=1): + log(f"[DB-SQL] Incrementing audio done for {book_id} by {amount}") + conn = _connect() + cur = conn.cursor() + cur.execute( + """ + UPDATE books + SET audio_done = COALESCE(audio_done,0) + ? + WHERE book_id = ? + """, + (amount, book_id), + ) + conn.commit() + conn.close() + + +def sql_inc_audio_skipped(book_id, amount=1): + log(f"[DB-SQL] Incrementing audio skipped for {book_id} by {amount}") + conn = _connect() + cur = conn.cursor() + cur.execute( + """ + UPDATE books + SET audio_skipped = COALESCE(audio_skipped,0) + ? + WHERE book_id = ? + """, + (amount, book_id), + ) + conn.commit() + conn.close() diff --git a/bookscraper/logbus/publisher.py b/bookscraper/logbus/publisher.py index d87695a..ed699e7 100644 --- a/bookscraper/logbus/publisher.py +++ b/bookscraper/logbus/publisher.py @@ -1,9 +1,31 @@ # logbus/publisher.py import logging +import os logger = logging.getLogger("logbus") +logger.setLevel(logging.WARNING) + +# ============================================================ +# FILE LOGGER — log.txt in BOOKSCRAPER_OUTPUT_DIR +# ============================================================ +try: + root = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output") + os.makedirs(root, exist_ok=True) + + file_path = os.path.join(root, "log.txt") + + file_handler = logging.FileHandler(file_path, mode="a", encoding="utf-8") + file_formatter = logging.Formatter("%(message)s") # exact zoals input + file_handler.setFormatter(file_formatter) + + logger.addHandler(file_handler) + +except Exception: + # Logging naar file mag nooit de app laten crashen + pass + def log(message: str): """ diff --git a/bookscraper/scraper/progress.py b/bookscraper/scraper/_dep_progress.py similarity index 100% rename from bookscraper/scraper/progress.py rename to bookscraper/scraper/_dep_progress.py diff --git a/bookscraper/scraper/abort.py b/bookscraper/scraper/abort.py index 2df7880..2c67f75 100644 --- a/bookscraper/scraper/abort.py +++ b/bookscraper/scraper/abort.py @@ -1,6 +1,8 @@ import os import redis +from scraper.logger_decorators import logcall + # GUI log (non-breaking) from scraper.ui_log import push_ui diff --git a/bookscraper/scraper/book_scraper.py b/bookscraper/scraper/book_scraper.py index 52c5bd7..a34976d 100644 --- a/bookscraper/scraper/book_scraper.py +++ b/bookscraper/scraper/book_scraper.py @@ -1,21 +1,66 @@ # ============================================================ # File: scraper/book_scraper.py # Purpose: -# Backwards-compatible wrapper giving same API as before. -# Uses the new engine under the hood. +# Backwards-compatible wrapper giving the SAME public API +# as the old BookScraper, but internally uses ScrapeEngine. +# +# execute() → full metadata + chapterlist +# +# (* Chapter downloading komt later in ScrapeEngine, +# maar deze wrapper hoeft NIET aangepast te worden.) # ============================================================ -from scraper.engine.parser import extract_metadata_full +from scraper.logger_decorators import logcall +from scraper.services.scrape_engine import ScrapeEngine class BookScraper: - def __init__(self, site_scraper, url): + """ + Backwards-compatible BookScraper façade. + + In het oude systeem deed BookScraper ALLES: + - metadata ophalen + - cover ophalen + - hoofdstukkenlijst + - hoofdstukken downloaden + - volume folders + - skip logic + + In het nieuwe systeem is dát opgesplitst: + + ScrapeEngine → metadata / chapterlist / download engine (in ontwikkeling) + BookScraper → behoudt dezelfde API als voorheen + + Daardoor kunnen Celery-tasks en oudere modules blijven werken + zonder refactor-chaos. + """ + + @logcall + def __init__(self, site_scraper, url: str): self.site = site_scraper self.url = url + @logcall def execute(self): """ - Backwards compatible full scrape: - returns {title, author, description, cover_url, chapters, book_url} + Public legacy API. + Retourneert metadata + chapters EXACT zoals de oude BookScraper + vóór downloadfase. + + Dit is belangrijk: + - INIT-flow gebruikt metadata only + - scraping tasks gebruiken chapterlist """ - return extract_metadata_full(self.url, self.site) + + data = ScrapeEngine.fetch_metadata_and_chapters(self.site, self.url) + + # Legacy output structuur volledig repliceren: + return { + "title": data.get("title"), + "author": data.get("author"), + "description": data.get("description"), + "cover_url": data.get("cover_url"), + "chapters": data.get("chapters", []), + "chapters_total": data.get("chapters_total", 0), + "book_url": data.get("book_url"), + } diff --git a/bookscraper/scraper/download_controller.py b/bookscraper/scraper/download_controller.py index f3ad53f..8f5c9ed 100644 --- a/bookscraper/scraper/download_controller.py +++ b/bookscraper/scraper/download_controller.py @@ -16,14 +16,6 @@ import os import requests import shutil from scraper.abort import abort_requested # DEBUG allowed -from db.repository import create_or_update_book - -# NEW: Redis State Model (C&U) -from scraper.progress import ( - init_book_state, - set_status, - set_chapter_total, -) class DownloadController: diff --git a/bookscraper/scraper/logger_decorators.py b/bookscraper/scraper/logger_decorators.py new file mode 100644 index 0000000..095abc1 --- /dev/null +++ b/bookscraper/scraper/logger_decorators.py @@ -0,0 +1,33 @@ +# ============================================================ +# File: scraper/logger_decorators.py +# Purpose: Function-call logging decorator +# ============================================================ + +from functools import wraps +from scraper.logger import log_debug + + +def logcall(func): + """ + Decorator: log function name + arguments every time it's called. + Usage: @logcall above any function. + """ + + @wraps(func) + def wrapper(*args, **kwargs): + # Naam van de functie + name = func.__qualname__ + + # Eerste logregel vóór uitvoering + # log_debug(f"[CALL] {name} args={args} kwargs={kwargs}") + log_debug(f"[CALL] {name} args={args}") + # log_debug(f"[CALL] {name}") + + result = func(*args, **kwargs) + + # Log ná uitvoering + # log_debug(f"[RETURN] {name} → {result}") + + return result + + return wrapper diff --git a/bookscraper/scraper/scriptgen.py b/bookscraper/scraper/scriptgen.py index bd9c148..4f94d68 100644 --- a/bookscraper/scraper/scriptgen.py +++ b/bookscraper/scraper/scriptgen.py @@ -6,6 +6,8 @@ import os import stat from logbus.publisher import log +from scraper.logger_decorators import logcall + TEMPLATE_DIR = os.path.join(os.path.dirname(__file__), "templates") diff --git a/bookscraper/scraper/services/cover_service.py b/bookscraper/scraper/services/cover_service.py index 8392947..3df193c 100644 --- a/bookscraper/scraper/services/cover_service.py +++ b/bookscraper/scraper/services/cover_service.py @@ -5,12 +5,13 @@ import os import requests from logbus.publisher import log +from typing import Optional class CoverService: @staticmethod - def download_main_cover(cover_url: str, book_id: str) -> str | None: + def download_main_cover(cover_url: str, book_id: str) -> Optional[str]: """ Downloads cover image into: static/covers/.jpg. Returns local path or None. diff --git a/bookscraper/scraper/services/init_service.py b/bookscraper/scraper/services/init_service.py index 91bf5bf..601c748 100644 --- a/bookscraper/scraper/services/init_service.py +++ b/bookscraper/scraper/services/init_service.py @@ -16,10 +16,13 @@ from scraper.services.cover_service import CoverService from db.repository import register_book +from scraper.logger_decorators import logcall + class InitService: @staticmethod + @logcall def derive_book_id(url: str) -> str: """ PTWXZ URL format ends with /{id}.html. @@ -31,16 +34,20 @@ class InitService: return url.replace("/", "_") @staticmethod + @logcall def execute(url: str) -> dict: """ Main INIT-flow entry point. Returns complete metadata + registration info. """ - # 1) Determine which BookSite applies + # 1) Determine site site = SiteResolver.resolve(url) - # 2) Metadata only (no chapters) + book_id = InitService.derive_book_id(url) + + site.book_id = book_id + # 2) Metadata only meta = ScrapeEngine.fetch_metadata_only(site, url) title = meta.get("title") or "Unknown" @@ -48,27 +55,27 @@ class InitService: description = meta.get("description") cover_url = meta.get("cover_url") - # 3) Determine book_id - book_id = InitService.derive_book_id(url) + # 4) Download UI cover (NEW: capture returned local path) + cover_path = CoverService.download_main_cover(cover_url, book_id) - # 4) SQLite registration + # 5) SQLite registration INCLUDING cover_path ← ★ FIX register_book( book_id=book_id, title=title, author=author, description=description, cover_url=cover_url, + cover_path=cover_path, # ← ★ BELANGRIJK + book_url=url, ) - # 5) Download UI cover - CoverService.download_main_cover(cover_url, book_id) - - # 6) Structured output for UI + # 6) Output for UI return { "book_id": book_id, "title": title, "author": author, "description": description, "cover_url": cover_url, + "cover_path": cover_path, # ← handig voor UI "status": "registered", } diff --git a/bookscraper/scraper/services/scrape_engine.py b/bookscraper/scraper/services/scrape_engine.py index 35df5ac..041d0f3 100644 --- a/bookscraper/scraper/services/scrape_engine.py +++ b/bookscraper/scraper/services/scrape_engine.py @@ -1,33 +1,287 @@ # ============================================================ # File: scraper/services/scrape_engine.py # Purpose: -# Provide unified scraping methods for INIT-flow. -# Reuses BookScraper internally with ZERO duplication. +# Unified scraping engine for INIT-flow and Celery tasks. +# All functions are fully logged via @logcall. # ============================================================ -from scraper.book_scraper import BookScraper +import os +import time +import re +import requests +from bs4 import BeautifulSoup +from urllib.parse import urljoin, urlparse + +from logbus.publisher import log +from scraper.logger import log_debug +from scraper.logger_decorators import logcall +from scraper.utils.utils import load_replacements class ScrapeEngine: """ - Adapter layer around BookScraper. - Allows INIT-flow to fetch ONLY metadata (no chapters). + Central scraping engine. + Metadata + chapterlist scraping. + All methods logged with @logcall. """ + # ------------------------------------------------------------ + # REPLACEMENTS LOADER + # ------------------------------------------------------------ @staticmethod - def fetch_metadata_only(site, url: str) -> dict: + @logcall + def _apply_replacements(site): + fp = os.path.join(os.getcwd(), "replacements.txt") + extra = load_replacements(fp) + if not hasattr(site, "replacements"): + site.replacements = {} + site.replacements.update(extra) + return True + + # ------------------------------------------------------------ + # RATE LIMITER + # ------------------------------------------------------------ + MIN_DELAY = 1.0 / float(os.getenv("MAX_DOWNLOADS_PER_SEC", "1")) + + @staticmethod + @logcall + def _throttle(last_time=[0]): + now = time.time() + elapsed = now - last_time[0] + if elapsed < ScrapeEngine.MIN_DELAY: + time.sleep(ScrapeEngine.MIN_DELAY - elapsed) + last_time[0] = time.time() + return True + + # ------------------------------------------------------------ + # HTTP GET + # ------------------------------------------------------------ + @staticmethod + @logcall + def _get_doc(url: str, site): + attempt = 1 + while True: + ScrapeEngine._throttle() + log_debug(f"[SCRAPER] GET {url} (attempt {attempt})") + + try: + resp = requests.get( + url, + headers={"User-Agent": "Mozilla/5.0"}, + timeout=10, + ) + except Exception as e: + log_debug(f"Network error {e} → retry {attempt + 1}s") + time.sleep(attempt + 1) + attempt += 1 + continue + + code = resp.status_code + + if code == 200: + resp.encoding = getattr(site, "encoding", "utf-8") + return BeautifulSoup(resp.text, "lxml") + + if code == 429: + cooldown = 60 + log_debug("429 detected — cooldown 60s") + for i in range(cooldown, 0, -1): + log_debug(f" cooldown {i}s…") + time.sleep(1) + attempt += 1 + continue + + if code in (403, 500): + wait = min(5 * attempt, 30) + log_debug(f"HTTP {code} → retry in {wait}s") + time.sleep(wait) + attempt += 1 + continue + + wait = attempt + 1 + log_debug(f"Unexpected HTTP {code} → sleep {wait}s") + time.sleep(wait) + attempt += 1 + + # ------------------------------------------------------------ + # PARSER HELPERS + # ------------------------------------------------------------ + @staticmethod + @logcall + def _parse_title(soup): + h1 = soup.find("h1") + return h1.get_text(strip=True) if h1 else "UnknownTitle" + + @staticmethod + @logcall + def _parse_author(soup): + td = soup.find("td", string=lambda t: t and "作" in t) + if td and ":" in td.get_text(): + return td.get_text(strip=True).split(":")[1] + return "UnknownAuthor" + + @staticmethod + @logcall + def _parse_description(soup): + span = soup.find("span", string=lambda t: t and "内容简介" in t) + if not span: + return "" + parts = [] + for sib in span.next_siblings: + if getattr(sib, "name", None) == "span": + break + txt = ( + sib.get_text(strip=True) + if hasattr(sib, "get_text") + else str(sib).strip() + ) + if txt: + parts.append(txt) + return "\n".join(parts) + + # ------------------------------------------------------------ + # COVER PARSER + # ------------------------------------------------------------ + @staticmethod + @logcall + def _parse_cover(soup, site): """ - Execute BookScraper but return ONLY metadata. - Chapters are intentionally removed. + Vind cover door book_id substring matching: + - haal book_id uit site.url + - zoek IMG-tags waarvan filename book_id bevat + - kies kortste filename als beste match """ - scraper = BookScraper(site, url) - result = scraper.execute() # returns full metadata + chapters + try: + parsed = urlparse(site.url) + m = re.search(r"/(\d+)\.html$", parsed.path) + if m: + book_id = m.group(1) + else: + book_id = parsed.path.rstrip("/").split("/")[-1] + except Exception: + return None + + imgs = soup.find_all("img", src=True) + candidates = [] + + for img in imgs: + src = img["src"].strip() + filename = os.path.basename(src) + if book_id in filename: + candidates.append((filename, src)) + + if not candidates: + return None + + candidates.sort(key=lambda t: len(t[0])) # kortste filename wint + best_src = candidates[0][1] + + return urljoin(site.root, best_src) + + # ------------------------------------------------------------ + # RESOLVE CHAPTER PAGE + # ------------------------------------------------------------ + @staticmethod + @logcall + def _resolve_chapter_page(soup, site): + node = soup.select_one( + "html > body > div:nth-of-type(6) > div:nth-of-type(2) > div > table" + ) + if not node: + raise ValueError("Could not locate chapter list base node") + + href = node.select_one("a").get("href") + url = urljoin(site.root, href) + + parsed = urlparse(url) + basepath = parsed.path.rsplit("/", 1)[0] + "/" + chapter_base = f"{parsed.scheme}://{parsed.netloc}{basepath}" + + return url, chapter_base + + # ------------------------------------------------------------ + # PARSE CHAPTER LINKS + # ------------------------------------------------------------ + @staticmethod + @logcall + def _parse_chapter_links(soup, chapter_base, selector): + cont = soup.select_one(selector) + if not cont: + return [] + + items = cont.select("ul li a[href]") + chapters = [] + idx = 1 + + for a in items: + href = a.get("href") + if not href.endswith(".html"): + continue + title = a.get_text(strip=True) + full = urljoin(chapter_base, href) + chapters.append({"num": idx, "title": title, "url": full}) + idx += 1 + + return chapters + + # ============================================================ + # PUBLIC APIS + # ============================================================ + + @staticmethod + @logcall + def fetch_metadata_only(site, url: str) -> dict: + ScrapeEngine._apply_replacements(site) + soup = ScrapeEngine._get_doc(url, site) + site.url = url # NODIG voor cover parsing - # Strip chapterlist — INIT-flow should not fetch them return { - "title": result.get("title"), - "author": result.get("author"), - "description": result.get("description"), - "cover_url": result.get("cover_url"), + "title": ScrapeEngine._parse_title(soup), + "author": ScrapeEngine._parse_author(soup), + "description": ScrapeEngine._parse_description(soup), + "cover_url": ScrapeEngine._parse_cover(soup, site), "book_url": url, } + + @staticmethod + @logcall + def fetch_metadata_and_chapters(site, url: str) -> dict: + ScrapeEngine._apply_replacements(site) + + soup = ScrapeEngine._get_doc(url, site) + site.url = url + + title = ScrapeEngine._parse_title(soup) + author = ScrapeEngine._parse_author(soup) + desc = ScrapeEngine._parse_description(soup) + cover = ScrapeEngine._parse_cover(soup, site) + + chapter_page_url, chapter_base = ScrapeEngine._resolve_chapter_page(soup, site) + chapter_soup = ScrapeEngine._get_doc(chapter_page_url, site) + + chapters = ScrapeEngine._parse_chapter_links( + chapter_soup, chapter_base, site.chapter_list_selector + ) + + return { + "title": title, + "author": author, + "description": desc, + "cover_url": cover, + "chapters": chapters, + "chapters_total": len(chapters), + "book_url": url, + } + + @staticmethod + @logcall + def fetch_chapterlist(site, url: str): + ScrapeEngine._apply_replacements(site) + soup = ScrapeEngine._get_doc(url, site) + + chapter_page_url, chapter_base = ScrapeEngine._resolve_chapter_page(soup, site) + chapter_soup = ScrapeEngine._get_doc(chapter_page_url, site) + + return ScrapeEngine._parse_chapter_links( + chapter_soup, chapter_base, site.chapter_list_selector + ) diff --git a/bookscraper/scraper/sites/base.py b/bookscraper/scraper/sites/base.py index b75f414..a2ec861 100644 --- a/bookscraper/scraper/sites/base.py +++ b/bookscraper/scraper/sites/base.py @@ -6,6 +6,7 @@ from abc import ABC, abstractmethod from bs4 import BeautifulSoup +from typing import Optional class SiteScraper(ABC): @@ -39,7 +40,7 @@ class SiteScraper(ABC): def parse_description(self, soup: BeautifulSoup) -> str: ... @abstractmethod - def parse_cover(self, soup: BeautifulSoup, url: str) -> str | None: ... + def parse_cover(self, soup: BeautifulSoup, url: str) -> Optional[str]: ... # -------------------------- # Chapter extraction diff --git a/bookscraper/scraper/sites/piaotian.py b/bookscraper/scraper/sites/piaotian.py index 95e430e..9ea89ff 100644 --- a/bookscraper/scraper/sites/piaotian.py +++ b/bookscraper/scraper/sites/piaotian.py @@ -9,6 +9,7 @@ from scraper.sites.base import SiteScraper from bs4 import BeautifulSoup from urllib.parse import urljoin import re +from typing import Optional class PiaotianScraper(SiteScraper): @@ -53,7 +54,7 @@ class PiaotianScraper(SiteScraper): # COVER PARSING # (exactly your BookScraper._parse_cover logic) # ------------------------------------------------------------ - def parse_cover(self, soup: BeautifulSoup, url: str) -> str | None: + def parse_cover(self, soup: BeautifulSoup, url: str) -> Optional[str]: # Extract book_id from URL m = re.search(r"/(\d+)\.html$", url) if not m: diff --git a/bookscraper/scraper/tasks/audio_tasks.py b/bookscraper/scraper/tasks/audio_tasks.py index 18fcf55..82cd692 100644 --- a/bookscraper/scraper/tasks/audio_tasks.py +++ b/bookscraper/scraper/tasks/audio_tasks.py @@ -1,5 +1,8 @@ # ============================================================ # File: scraper/tasks/audio_tasks.py +# Purpose: Convert chapter text files into audio using macOS +# “say”, with Redis-based slot control. +# Updated: now uses db.repository for audio counters. # ============================================================ from celery_app import celery_app @@ -7,61 +10,80 @@ from logbus.publisher import log import os import subprocess import time +import socket +import os -from scraper.progress import inc_audio_done, inc_audio_skipped - -# from db.repository import inc_audio_done from scraper.abort import abort_requested +from scraper.logger_decorators import logcall from redis import Redis from urllib.parse import urlparse -# Kies lokale redis als aanwezig, anders standaard backend -redis_url = os.getenv("REDIS_BACKEND_LOCAL") or os.getenv("REDIS_BACKEND") +# NEW — unified repository façade +from db.repository import ( + inc_audio_done, + inc_audio_skipped, +) -parsed = urlparse(redis_url) +HOST = socket.gethostname() # ------------------------------------------------------------ -# REGULIER REDIS CLIENT (slots, file checks, dstate) +# REDIS CLIENT SETUP # ------------------------------------------------------------ +redis_url = os.getenv("REDIS_BACKEND_LOCAL") or os.getenv("REDIS_BACKEND") +parsed = urlparse(redis_url) + +# Slot locking Redis client redis_client = Redis( host=parsed.hostname, port=parsed.port, db=parsed.path.strip("/"), ) -# ------------------------------------------------------------ -# BACKEND CLIENT (abort flags, progress counters) - altijd DB 0 -# ------------------------------------------------------------ +# Abort + global progress flags always live in DB 0 backend_client = Redis( host=parsed.hostname, port=parsed.port, db=0, ) +# ------------------------------------------------------------ +# CONFIG +# ------------------------------------------------------------ AUDIO_TIMEOUT = int(os.getenv("AUDIO_TIMEOUT_SECONDS", "300")) AUDIO_VOICE = os.getenv("AUDIO_VOICE", "SinJi") AUDIO_RATE = int(os.getenv("AUDIO_RATE", "200")) -HOST_PATH = os.getenv("HOST_PATH", "/app/output") -AUDIO_SLOTS = int(os.getenv("AUDIO_SLOTS", "1")) +HOST_PATH = os.getenv("HOST_PATH", "/app/output") CONTAINER_PREFIX = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "/app/output") +AUDIO_SLOTS = int(os.getenv("AUDIO_SLOTS", "1")) + +# ============================================================ +# CELERY TASK +# ============================================================ @celery_app.task(bind=True, queue="audio", ignore_result=True) +@logcall def generate_audio( - self, book_id, volume_name, chapter_number, chapter_title, chapter_text + self, book_id, volume_name, chapter_number, chapter_title, chapter_path ): - log(f"[AUDIO] CH{chapter_number}: START task → raw_input={chapter_text}") + """ + chapter_path: absolute container path to chapter text file. + """ + + log(f"[AUDIO]({HOST}) CH{chapter_number}: START → {chapter_title}") - # Abort early + # ------------------------------------------------------------ + # ABORT CHECK + # ------------------------------------------------------------ if abort_requested(book_id, backend_client): inc_audio_skipped(book_id) - log(f"[AUDIO] ABORT detected → skip CH{chapter_number}") + log(f"[AUDIO]({HOST}) ABORT detected → skip CH{chapter_number}") return - # ============================================================ - # ACQUIRE AUDIO SLOT - # ============================================================ + # ------------------------------------------------------------ + # ACQUIRE SLOT + # ------------------------------------------------------------ slot_key = None ttl = AUDIO_TIMEOUT + 15 @@ -72,11 +94,13 @@ def generate_audio( log(f"[AUDIO] CH{chapter_number}: Acquired slot {i}/{AUDIO_SLOTS}") break + # Need to wait if slot_key is None: - log(f"[AUDIO] CH{chapter_number}: All slots busy → waiting...") + log(f"[AUDIO] CH{chapter_number}: All slots busy → waiting…") start_wait = time.time() while slot_key is None: + # Try all slots again for i in range(1, AUDIO_SLOTS + 1): key = f"audio_slot:{i}" if redis_client.set(key, "1", nx=True, ex=ttl): @@ -84,32 +108,32 @@ def generate_audio( log(f"[AUDIO] CH{chapter_number}: Slot acquired after wait") break - if slot_key: - break + # If still no slot + if not slot_key: + if abort_requested(book_id, backend_client): + log(f"[AUDIO] ABORT while waiting → skip CH{chapter_number}") + inc_audio_skipped(book_id) + return - if abort_requested(book_id, backend_client): - log(f"[AUDIO] ABORT while waiting → skip CH{chapter_number}") - return + if time.time() - start_wait > ttl: + log(f"[AUDIO] CH{chapter_number}: Wait timeout → abort audio") + inc_audio_skipped(book_id) + return - if time.time() - start_wait > ttl: - log(f"[AUDIO] CH{chapter_number}: Slot wait timeout → aborting audio") - return + time.sleep(0.25) - time.sleep(0.25) - - # ============================================================ + # ------------------------------------------------------------ # PATH NORMALISATION - # ============================================================ - - container_path = chapter_text + # ------------------------------------------------------------ + container_path = chapter_path - # Fix 1 — container_path kan None zijn → abort zonder crash if not container_path: - log(f"[AUDIO] CH{chapter_number}: FATAL — no input path provided") + log(f"[AUDIO] CH{chapter_number}: ERROR — no input file path provided") redis_client.delete(slot_key) + inc_audio_skipped(book_id) return - # Fix 2 — veilige startswith + # Strip container prefix so that host path is resolvable if CONTAINER_PREFIX and container_path.startswith(CONTAINER_PREFIX): relative_path = container_path[len(CONTAINER_PREFIX) :].lstrip("/") else: @@ -118,35 +142,35 @@ def generate_audio( parts = relative_path.split("/") if len(parts) < 3: log( - f"[AUDIO] CH{chapter_number}: FATAL — cannot parse book/volume from {relative_path}" + f"[AUDIO] CH{chapter_number}: ERROR — cannot parse book/volume from {relative_path}" ) redis_client.delete(slot_key) + inc_audio_skipped(book_id) return - book_from_path = parts[0] - volume_from_path = parts[1] - + # book_from_path = parts[0] # volume_name passed explicitly anyway + # volume_from_path = parts[1] host_path = os.path.join(HOST_PATH, relative_path) - # ============================================================ - # OUTPUT PREP - # ============================================================ - - base_dir = os.path.join(HOST_PATH, book_from_path, volume_from_path, "Audio") + # ------------------------------------------------------------ + # OUTPUT DIRECTORY + # ------------------------------------------------------------ + base_dir = os.path.join(HOST_PATH, parts[0], parts[1], "Audio") os.makedirs(base_dir, exist_ok=True) safe_num = f"{chapter_number:04d}" audio_file = os.path.join(base_dir, f"{safe_num}.m4b") + # Skip if audio already exists if os.path.exists(audio_file): - log(f"[AUDIO] Skip CH{chapter_number} → already exists") + log(f"[AUDIO] CH{chapter_number}: Already exists → skip") redis_client.delete(slot_key) + inc_audio_skipped(book_id) return - # ============================================================ - # BUILD CMD - # ============================================================ - + # ------------------------------------------------------------ + # BUILD TTS COMMAND + # ------------------------------------------------------------ cmd = ( f"say --voice={AUDIO_VOICE} " f"--input-file='{host_path}' " @@ -157,30 +181,34 @@ def generate_audio( f"--data-format=aac" ) - log(f"[AUDIO] CH{chapter_number}: CMD = {cmd}") + log(f"[AUDIO]({HOST}) CH{chapter_number} → output: {audio_file}") - # ============================================================ - # RUN TTS - # ============================================================ + # ------------------------------------------------------------ + # EXECUTE + # ------------------------------------------------------------ try: subprocess.run(cmd, shell=True, check=True, timeout=AUDIO_TIMEOUT) + # NEW — repository façade inc_audio_done(book_id) - log(f"[AUDIO] CH{chapter_number}: Completed") + log(f"[AUDIO]({HOST}) CH{chapter_number}: Completed") except subprocess.TimeoutExpired: - log(f"[AUDIO] CH{chapter_number}: TIMEOUT → remove incomplete file") + log(f"[AUDIO]({HOST}) CH{chapter_number}: TIMEOUT → removing file") if os.path.exists(audio_file): try: os.remove(audio_file) except Exception: pass + inc_audio_skipped(book_id) except subprocess.CalledProcessError as e: log(f"[AUDIO] CH{chapter_number}: ERROR during say → {e}") + inc_audio_skipped(book_id) except Exception as e: log(f"[AUDIO] CH{chapter_number}: UNEXPECTED ERROR → {e}") + inc_audio_skipped(book_id) finally: if slot_key: diff --git a/bookscraper/scraper/tasks/controller_tasks.py b/bookscraper/scraper/tasks/controller_tasks.py index 0c1419d..0992dfa 100644 --- a/bookscraper/scraper/tasks/controller_tasks.py +++ b/bookscraper/scraper/tasks/controller_tasks.py @@ -10,18 +10,20 @@ from celery_app import celery_app from logbus.publisher import log from scraper.download_controller import DownloadController -from scraper.progress import ( - set_total, -) + from urllib.parse import urlparse + +from scraper.logger_decorators import logcall import redis import os from scraper.abort import abort_requested +from db.repository import set_chapters_total print(">>> [IMPORT] controller_tasks.py loaded") @celery_app.task(bind=True, queue="controller", ignore_result=False) +@logcall def launch_downloads(self, book_id: str, scrape_result: dict): """ Launch the entire pipeline (download → parse → save), @@ -62,7 +64,8 @@ def launch_downloads(self, book_id: str, scrape_result: dict): # ------------------------------------------------------------ # INIT PROGRESS # ------------------------------------------------------------ - set_total(book_id, total) + set_chapters_total(book_id, total) + log(f"[CTRL] Progress initialized for {book_id}: total={total}") # ------------------------------------------------------------ diff --git a/bookscraper/scraper/tasks/download_tasks.py b/bookscraper/scraper/tasks/download_tasks.py index d3f3785..fdaaad9 100644 --- a/bookscraper/scraper/tasks/download_tasks.py +++ b/bookscraper/scraper/tasks/download_tasks.py @@ -1,26 +1,21 @@ # ============================================================ # File: scraper/tasks/download_tasks.py -# Purpose: Download chapter HTML with global concurrency, -# retry/backoff logic, 429 support, and abort-awareness. -# -# Logging: -# - timestamp + book_id in message -# - logbus.publisher → console -# - ui_log.push_ui → Redis GUI # ============================================================ from celery_app import celery_app -from scraper.utils import get_save_path +from scraper.utils.utils import get_save_path from scraper.abort import abort_requested, chapter_started, mark_chapter_started -from scraper.progress import ( - inc_completed, - inc_chapter_done, - inc_chapter_download_skipped, +# Repository façade — correct imports only +from db.repository import ( + set_status, + inc_download_done, + inc_download_skipped, ) -from db.repository import inc_downloaded, set_status + from logbus.publisher import log from scraper.ui_log import push_ui +from scraper.logger_decorators import logcall import requests import redis @@ -90,81 +85,74 @@ def release_global_slot(): # ============================================================ -# CELERY TASK — NEW SIGNATURE WITH chapter_dict + book_meta +# CELERY TASK — Unified payload v3 # ============================================================ @celery_app.task(bind=True, queue="download", ignore_result=False) -def download_chapter(self, book_id: str, chapter_dict: dict, book_meta: dict): +@logcall +def download_chapter(self, payload: dict): """ - New unified chapter model: - chapter_dict = { - "num": int, - "url": str, - "title": str, - "volume_path": str - } - - book_meta is propagated through the pipeline for parse/save. + Payload: + { + "book_id": str, + "chapter": { "num", "url", "title", "volume_path" }, + "book_meta": dict, + "html": None | str, + "parsed": None | dict, + "skipped": bool, + "path": None | str + } """ - chapter_num = chapter_dict.get("num") - chapter_url = chapter_dict.get("url") - chapter_title = chapter_dict.get("title") or f"Chapter {chapter_num}" - volume_path = chapter_dict.get("volume_path") + if not payload: + raise ValueError("download_chapter received empty payload") + + book_id = payload["book_id"] + chapter = payload["chapter"] + book_meta = payload.get("book_meta") or {} + + chapter_num = chapter["num"] + chapter_url = chapter["url"] + chapter_title = chapter.get("title") or f"Chapter {chapter_num}" + volume_path = chapter["volume_path"] + + # STATUS UPDATE + set_status(book_id, "downloading") - # ----------------------------------------------------------- - # ABORT BEFORE START - # ----------------------------------------------------------- + # ABORT CHECK if abort_requested(book_id) and not chapter_started(book_id, chapter_num): - msg = f"[ABORT] Skip chapter {chapter_num} (abort active, not started)" - log_msg(book_id, msg) - inc_chapter_download_skipped(book_id) - return { - "book_id": book_id, - "chapter": chapter_dict, - "html": None, - "skipped": True, - "path": None, - "abort": True, - "book_meta": book_meta, - } - - # Mark chapter as started + log_msg(book_id, f"[ABORT] Skip chapter {chapter_num}") + + inc_download_skipped(book_id) + + payload["html"] = None + payload["skipped"] = True + payload["path"] = None + return payload + mark_chapter_started(book_id, chapter_num) - # ----------------------------------------------------------- - # SKIP IF FILE ALREADY EXISTS - # ----------------------------------------------------------- + # SKIP IF FILE ALREADY SAVED save_path = get_save_path(chapter_num, volume_path) - if os.path.exists(save_path): - log_msg(book_id, f"[DL] SKIP {chapter_num} ({chapter_title}) → {save_path}") - return { - "book_id": book_id, - "chapter": chapter_dict, - "html": None, - "skipped": True, - "path": save_path, - "book_meta": book_meta, - } - - # ----------------------------------------------------------- - # GLOBAL + SYNC DELAY - # ----------------------------------------------------------- + log_msg(book_id, f"[DL] SKIP {chapter_num} → {save_path}") + + inc_download_skipped(book_id) + + payload["html"] = None + payload["skipped"] = True + payload["path"] = save_path + return payload + + # GLOBAL DELAY + SLOT if GLOBAL_DELAY > 0: time.sleep(GLOBAL_DELAY) wait_for_global_delay() acquire_global_slot(MAX_CONCURRENCY) - # log_msg(book_id, f"[DL] ACQUIRED SLOT for chapter {chapter_num}") - # ----------------------------------------------------------- # HTTP DOWNLOAD - # ----------------------------------------------------------- try: - log_msg( - book_id, - f"[DL] Downloading {chapter_num} ({chapter_title}): {chapter_url}", - ) + log_msg(book_id, f"[DL] Downloading {chapter_num} ({chapter_title})") resp = requests.get( chapter_url, @@ -178,39 +166,27 @@ def download_chapter(self, book_id: str, chapter_dict: dict, book_meta: dict): log_msg(book_id, f"[DL] OK {chapter_num}: {len(html)} bytes") - return { - "book_id": book_id, - "chapter": chapter_dict, - "html": html, - "skipped": False, - "path": save_path, - "book_meta": book_meta, - } + inc_download_done(book_id) + + # --- attach results --- + payload["html"] = html + payload["skipped"] = False + payload["path"] = save_path + return payload except Exception as exc: attempt = self.request.retries delay = BASE_DELAY * (BACKOFF**attempt) - # Specific 429 handler if getattr(getattr(exc, "response", None), "status_code", None) == 429: - log_msg( - book_id, - f"[DL] 429 {chapter_num} → WAIT {DELAY_429}s " - f"(attempt {attempt}/{MAX_RETRIES})", - ) + log_msg(book_id, f"[DL] 429 → WAIT {DELAY_429}s") time.sleep(DELAY_429) set_global_delay() raise self.retry(exc=exc, countdown=0, max_retries=MAX_RETRIES) - # Normal retry - log_msg( - book_id, - f"[DL] ERROR {chapter_num}: {exc} → retry in {delay}s " - f"(attempt {attempt}/{MAX_RETRIES})", - ) + log_msg(book_id, f"[DL] ERROR {chapter_num}: {exc} → retry {delay}s") raise self.retry(exc=exc, countdown=delay, max_retries=MAX_RETRIES) finally: set_global_delay() release_global_slot() - # log_msg(book_id, f"[DL] RELEASED SLOT for chapter {chapter_num}") diff --git a/bookscraper/scraper/tasks/parse_tasks.py b/bookscraper/scraper/tasks/parse_tasks.py index d11eaf2..7fabd15 100644 --- a/bookscraper/scraper/tasks/parse_tasks.py +++ b/bookscraper/scraper/tasks/parse_tasks.py @@ -1,33 +1,31 @@ -# ========================================================= +# ============================================================ # File: scraper/tasks/parse_tasks.py # Purpose: Parse downloaded HTML into clean chapter text. -# Enhanced version: Piaotia H1→content extractor + clean pipeline -# NO HARDCODED REPLACEMENTS — everything comes from replacement files -# ========================================================= +# Enhanced Piaotia extractor + selector fallback + clean pipeline. +# Compatible with payload pipeline v3. +# ============================================================ from celery_app import celery_app -from bs4 import BeautifulSoup +from bs4 import BeautifulSoup, NavigableString, Comment -from scraper.utils import clean_text, load_all_replacements -from scraper.tasks.download_tasks import log_msg # unified logger +from scraper.tasks.download_tasks import log_msg +from scraper.utils.utils import clean_text, load_all_replacements +from scraper.logger_decorators import logcall +from db.repository import inc_parsed_done -from bs4 import NavigableString, Comment -print(">>> [IMPORT] parse_tasks.py loaded (enhanced parser)") +print(">>> [IMPORT] parse_tasks.py loaded (RESTORED + payload v3)") +# ============================================================ +# PIAOTIA ADVANCED CONTENT EXTRACTOR (unchanged from original) +# ============================================================ def extract_piaotia_content(soup): - """ - Extract clean chapter content from Piaotia pages. - Start after the table following

. - End before nav/ads/footer/copyright. - """ - h1 = soup.find("h1") if not h1: return None - # -------- Find first table after

-------- + # Find first table after

table = None for sib in h1.next_siblings: if getattr(sib, "name", None) == "table": @@ -39,44 +37,41 @@ def extract_piaotia_content(soup): parts = [] - # -------- Iterate after table -------- for sib in table.next_siblings: - name = getattr(sib, "name", None) text = None + if hasattr(sib, "get_text"): text = sib.get_text(strip=True) - # === STOP CONDITIONS === + # STOP CONDITIONS - # Comments like + # if isinstance(sib, Comment) and ("翻页" in sib): break - # Explicit footer blocks + # explicit footer blocks if name == "div": sid = sib.get("id", "") cls = sib.get("class", []) if sid in ("thumb", "tags", "tips", "Commenddiv", "feit2"): break - # Copyright block — strongest indicator + # copyright block if text and ("重要声明" in text or "Copyright" in text): break - # Navigation or 推荐阅读 + # navigation blocks if text and (text.startswith(("推荐阅读", "目录", "目 录"))): break - # Skip scripts, ads, centers if name in ("script", "style"): continue - # Skip JS containers like
if name == "center": continue - # === ACCUMULATE TEXT === + # ACCUMULATE if isinstance(sib, NavigableString): s = sib.strip() if s: @@ -90,36 +85,42 @@ def extract_piaotia_content(soup): return "\n".join(parts).strip() +# ============================================================ +# PARSE TASK — PAYLOAD PIPELINE (CORRECT v3 FORMAT) +# ============================================================ @celery_app.task(bind=True, queue="parse", ignore_result=False) -def parse_chapter(self, download_result: dict): - """ - New signature under chapter_dict pipeline: - - receives ONLY the output dict from download_chapter - - book_meta is inside download_result["book_meta"] - - chapter_dict is inside download_result["chapter"] - """ - - book_id = download_result.get("book_id", "NOBOOK") - chapter_dict = download_result.get("chapter") or {} - book_meta = download_result.get("book_meta") or {} - chapter_title = chapter_dict.get("title") - chapter_num = chapter_dict.get("num") - chapter_url = chapter_dict.get("url") - html = download_result.get("html") - # ------------------------------------------------------------ +@logcall +def parse_chapter(self, payload: dict): + + if not payload: + return {"skipped": True, "reason": "empty_payload"} + + book_id = payload["book_id"] + chapter = payload["chapter"] + book_meta = payload.get("book_meta") or {} + + num = chapter.get("num") + title = chapter.get("title") or f"Chapter {num}" + html = payload.get("html") + # SKIPPED DOWNLOAD → SKIP PARSE - # ------------------------------------------------------------ - if download_result.get("skipped"): - log_msg(book_id, f"[PARSE] SKIP chapter {chapter_num} (download skipped)") - return download_result # already has chapter + book_meta + skipped + if payload.get("skipped"): + log_msg(book_id, f"[PARSE] SKIP chapter {num} (download skipped)") + return payload - log_msg(book_id, f"[PARSE] Parsing chapter {chapter_num}") + if not html: + log_msg(book_id, f"[PARSE] Missing HTML for chapter {num} → SKIP") + payload["parsed"] = None + payload["skipped"] = True + return payload + + log_msg(book_id, f"[PARSE] Parsing chapter {num}") soup = BeautifulSoup(html, "lxml") - # ------------------------------------------------------------ - # STRICT SELECTORS (direct content blocks) - # ------------------------------------------------------------ + # ============================================================ + # STRICT SELECTORS + # ============================================================ selectors = [ "#content", "div#content", @@ -141,63 +142,32 @@ def parse_chapter(self, download_result: dict): raw = None - # --- STRICT SELECTOR FAILED → Try Piaotia extractor --- + # --- STRICT SELECTOR FAILED → Piaotia extractor --- if node is None: raw = extract_piaotia_content(soup) + else: + raw = node.get_text(separator="\n") - # # ------------------------------------------------------------ - # # PIAOTIA FALLBACK: - # # Extract content between

and the "bottomlink" block. - # # ------------------------------------------------------------ - # raw = None - # if node is None: - # h1 = soup.find("h1") - # if h1: - # content_parts = [] - # for sib in h1.next_siblings: - - # sib_class = getattr(sib, "get", lambda *_: None)("class") - # if sib_class and ( - # "bottomlink" in sib_class or sib_class == "bottomlink" - # ): - # break - - # if getattr(sib, "name", None) in ["script", "style", "center"]: - # continue - - # if hasattr(sib, "get_text"): - # content_parts.append(sib.get_text(separator="\n")) - # else: - # content_parts.append(str(sib)) - - # raw = "\n".join(content_parts) - - # ------------------------------------------------------------ # FINAL FALLBACK - # ------------------------------------------------------------ if raw is None: - if node: - raw = node.get_text(separator="\n") - else: - for tag in soup(["script", "style", "noscript"]): - tag.decompose() - raw = soup.get_text(separator="\n") + for tag in soup(["script", "style", "noscript"]): + tag.decompose() + raw = soup.get_text(separator="\n") - # ------------------------------------------------------------ - # MULTIPASS CLEANING via replacement files ONLY - # ------------------------------------------------------------ + # ============================================================ + # MULTIPASS CLEANING via replacement files + # ============================================================ REPL = load_all_replacements() text = raw for _ in range(5): text = clean_text(text, REPL) - # ------------------------------------------------------------ - # Collapse excessive empty lines - # ------------------------------------------------------------ + # ============================================================ + # Collapse double blank lines + # ============================================================ cleaned = [] prev_blank = False - for line in text.split("\n"): stripped = line.rstrip() if stripped == "": @@ -208,28 +178,31 @@ def parse_chapter(self, download_result: dict): else: prev_blank = False cleaned.append(stripped) + text = "\n".join(cleaned) - text = chapter_title + "\n" + text - # ------------------------------------------------------------ + text = f"{title}\n{text}" + + # ============================================================ # Add header to chapter 1 - # ------------------------------------------------------------ - if chapter_num == 1: - book_url = book_meta.get("book_url") or book_meta.get("url") or "UNKNOWN" + # ============================================================ + if num == 1: + book_url = book_meta.get("book_url") or "UNKNOWN" header = ( - f"{book_meta.get('title','')}\n" + f"{book_meta.get('title', '')}\n" f"Author: {book_meta.get('author','')}\n" f"Description:\n{book_meta.get('description','')}\n" f"Book URL: {book_url}\n" + "-" * 50 + "\n\n" ) text = header + text - log_msg(book_id, f"[PARSE] Parsed chapter {chapter_num}: {len(text)} chars") + log_msg(book_id, f"[PARSE] Parsed chapter {num}: {len(text)} chars") + + # ============================================================ + # PAYLOAD OUTPUT (v3) + # ============================================================ + payload["parsed"] = text + payload["skipped"] = False + + inc_parsed_done(book_id) - # NEW RETURN FORMAT: chapter_dict stays intact - return { - "book_id": book_id, - "chapter": chapter_dict, - "text": text, - "length": len(text), - "book_meta": book_meta, - } + return payload diff --git a/bookscraper/scraper/tasks/pipeline.py b/bookscraper/scraper/tasks/pipeline.py index 267af60..a42d827 100644 --- a/bookscraper/scraper/tasks/pipeline.py +++ b/bookscraper/scraper/tasks/pipeline.py @@ -1,16 +1,12 @@ # ========================================================= # File: scraper/tasks/pipeline.py # Purpose: -# Build Celery chains for chapter processing using chapter_dict. +# Build Celery chains for chapter processing using payload dict. # -# New Chain: -# download_chapter(book_id, chapter_dict, book_meta) -# → parse_chapter(download_result) -# → save_chapter(parsed_result) -# → update_progress(final_result, book_id) -# -# All subtasks pass through result dicts unchanged so the -# next stage receives the correct fields. +# Pipeline v3: +# download_chapter(payload) +# → parse_chapter(payload) +# → save_chapter(payload) # ========================================================= from celery import chain @@ -18,26 +14,28 @@ from celery import chain from scraper.tasks.download_tasks import download_chapter from scraper.tasks.parse_tasks import parse_chapter from scraper.tasks.save_tasks import save_chapter -from scraper.tasks.progress_tasks import update_progress +from scraper.logger_decorators import logcall -def build_chapter_pipeline( - book_id: str, - chapter_dict: dict, - book_meta: dict, -): - """ - Build a Celery chain for one chapter using chapter_dict. - download_chapter(book_id, chapter_dict, book_meta) - → parse_chapter(download_result) - → save_chapter(parsed_result) - → update_progress(result, book_id) +@logcall +def build_chapter_pipeline(book_id: str, chapter_dict: dict, book_meta: dict): """ + Payload model passed through entire pipeline. + """ + + payload = { + "book_id": book_id, + "chapter": chapter_dict, + "book_meta": book_meta, + "html": None, + "parsed": None, + "skipped": False, + "path": None, + } return chain( - download_chapter.s(book_id, chapter_dict, book_meta), + download_chapter.s(payload), parse_chapter.s(), save_chapter.s(), - update_progress.s(book_id), ) diff --git a/bookscraper/scraper/tasks/progress_tasks.py b/bookscraper/scraper/tasks/progress_tasks.py deleted file mode 100644 index 3752893..0000000 --- a/bookscraper/scraper/tasks/progress_tasks.py +++ /dev/null @@ -1,57 +0,0 @@ -# ============================================================ -# File: scraper/tasks/progress_tasks.py -# Purpose: Central progress updater for chapter pipelines. -# Updated for chapter_dict pipeline model. -# ============================================================ - -from celery_app import celery_app -from scraper.progress import inc_completed, inc_skipped, inc_failed -from logbus.publisher import log - -print(">>> [IMPORT] progress_tasks.py loaded") - - -@celery_app.task(bind=False, name="progress.update", queue="controller") -def update_progress(result: dict, book_id: str): - """ - Central progress logic: - - result: output of save_chapter - - book_id: explicitly passed by pipeline - - IMPORTANT: - - save_chapter already updates counters for skipped & normal chapters - - progress.update MUST NOT double-increment - """ - - ch = result.get("chapter") or {} - chapter_num = ch.get("num") - - skipped = result.get("skipped", False) - failed = result.get("failed", False) - - # ------------------------------------------------------------ - # FAILED CASE - # ------------------------------------------------------------ - if failed: - inc_failed(book_id) - log(f"[PROG] FAILED chapter {chapter_num}") - return result - - # ------------------------------------------------------------ - # SKIPPED CASE - # ------------------------------------------------------------ - if skipped: - # save_chapter already did: - # inc_skipped(book_id) - log(f"[PROG] SKIPPED chapter {chapter_num}") - return result - - # ------------------------------------------------------------ - # NORMAL COMPLETION - # ------------------------------------------------------------ - # save_chapter did NOT increment completed for skipped cases - # but DID inc_completed(book_id) for normal cases. - # update_progress should NOT double increment, so only log here. - log(f"[PROG] DONE chapter {chapter_num}") - - return result diff --git a/bookscraper/scraper/tasks/save_tasks.py b/bookscraper/scraper/tasks/save_tasks.py index 0999676..774920d 100644 --- a/bookscraper/scraper/tasks/save_tasks.py +++ b/bookscraper/scraper/tasks/save_tasks.py @@ -1,139 +1,83 @@ # ============================================================ -# File: scraper/tasks/save_tasks.py -# Purpose: Save parsed chapter text to disk + trigger audio. -# Updated for chapter_dict + book_meta pipeline model. +# File: scraper/tasks/save_tasks.py (RESTORED AUDIO LOGIC) # ============================================================ print(">>> [IMPORT] save_tasks.py loaded") from celery import shared_task import os -from scraper.utils import get_save_path -from scraper.tasks.download_tasks import log_msg # unified logger -from scraper.progress import ( - inc_completed, - inc_chapter_done, - inc_chapter_download_skipped, -) + +from logbus.publisher import log +from scraper.logger_decorators import logcall +from scraper.utils.utils import get_save_path +from scraper.tasks.download_tasks import log_msg from scraper.tasks.audio_tasks import generate_audio +from db.repository import inc_download_done, inc_download_skipped + @shared_task(bind=True, queue="save", ignore_result=False) -def save_chapter(self, parsed: dict): - """ - New pipeline model: - parsed = { - "book_id": str, - "chapter": chapter_dict, - "text": str, - "length": int, - "book_meta": dict, - "skipped": bool, - "path": optional str (if skipped) - } - """ - - book_id = parsed.get("book_id", "NOBOOK") - chapter_dict = parsed.get("chapter") or {} - book_meta = parsed.get("book_meta") or {} - - chapter_num = chapter_dict.get("num") - chapter_title = chapter_dict.get("title") or f"Chapter {chapter_num}" - volume_path = chapter_dict.get("volume_path") - - # ------------------------------------------------------------ - # VALIDATION - # ------------------------------------------------------------ - if chapter_num is None or volume_path is None: - raise ValueError("Invalid parsed payload: chapter_dict missing fields.") - - # ------------------------------------------------------------ - # SKIPPED CASE - # ------------------------------------------------------------ - if parsed.get("skipped"): - path = parsed.get("path", None) - log_msg(book_id, f"[SAVE] SKIP chapter {chapter_num} → {path}") - - inc_chapter_download_skipped(book_id) - - volume_name = os.path.basename(volume_path.rstrip("/")) - - # Queue audio only if a valid file exists +@logcall +def save_chapter(self, payload: dict): + + if not payload: + log("[SAVE] ERROR: payload is None") + return {"error": True} + + book_id = payload["book_id"] + chapter = payload["chapter"] + parsed = payload.get("parsed") + path = payload.get("path") + skipped = payload.get("skipped") + + num = chapter["num"] + title = chapter.get("title") or f"Chapter {num}" + volume = chapter.get("volume_path") + volume_name = os.path.basename(volume.rstrip("/")) + + # ============================================================ + # SKIPPED CASE (restore old behavior) + # ============================================================ + if skipped or not parsed: + log_msg(book_id, f"[SAVE] SKIP chapter {num}") + inc_download_skipped(book_id) + + # Restore old behavior: + # If file already exists, STILL trigger audio. if path and os.path.exists(path): + log_msg(book_id, f"[AUDIO] Queueing audio for SKIPPED chapter {num}") try: - generate_audio.delay( - book_id, - volume_name, - chapter_num, - chapter_title, - path, - ) - log_msg( - book_id, - f"[AUDIO] Task queued (SKIPPED) for chapter {chapter_num} in {volume_name}", - ) - except Exception as audio_exc: - log_msg( - book_id, - f"[AUDIO] ERROR queueing (SKIPPED) chapter {chapter_num}: {audio_exc}", - ) - - return { - "book_id": book_id, - "chapter": chapter_dict, - "path": path, - "skipped": True, - "book_meta": book_meta, - } - - # ------------------------------------------------------------ - # NORMAL SAVE CASE - # ------------------------------------------------------------ - try: - text = parsed.get("text", "") + generate_audio.delay(book_id, volume_name, num, title, path) + except Exception as exc: + log_msg(book_id, f"[AUDIO] ERROR queueing skipped audio: {exc}") - # Ensure volume folder exists - os.makedirs(volume_path, exist_ok=True) + return payload - # Build final chapter file path - path = get_save_path(chapter_num, volume_path) + # ============================================================ + # NORMAL SAVE CASE + # ============================================================ + try: + os.makedirs(volume, exist_ok=True) + save_path = get_save_path(num, volume) - # Write chapter text to file - with open(path, "w", encoding="utf-8") as f: - f.write(text) + with open(save_path, "w", encoding="utf-8") as f: + f.write(parsed) - log_msg(book_id, f"[SAVE] Saved chapter {chapter_num} → {path}") - inc_chapter_done(book_id) - inc_completed(book_id) + log_msg(book_id, f"[SAVE] Saved chapter {num} → {save_path}") - # Determine volume name - volume_name = os.path.basename(volume_path.rstrip("/")) + inc_download_done(book_id) - # Queue audio task + # Restore old behavior → ALWAYS queue audio try: - generate_audio.delay( - book_id, - volume_name, - chapter_num, - chapter_title, - path, - ) - log_msg( - book_id, - f"[AUDIO] Task queued for chapter {chapter_num} in {volume_name}", - ) - except Exception as audio_exc: - log_msg( - book_id, f"[AUDIO] ERROR queueing chapter {chapter_num}: {audio_exc}" - ) - - return { - "book_id": book_id, - "chapter": chapter_dict, - "path": path, - "book_meta": book_meta, - } + generate_audio.delay(book_id, volume_name, num, title, save_path) + log_msg(book_id, f"[AUDIO] Task queued for chapter {num}") + except Exception as exc: + log_msg(book_id, f"[AUDIO] ERROR queueing chapter {num}: {exc}") + + payload["path"] = save_path + payload["skipped"] = False + return payload except Exception as exc: - log_msg(book_id, f"[SAVE] ERROR saving chapter {chapter_num}: {exc}") + log_msg(book_id, f"[SAVE] ERROR saving chapter {num}: {exc}") raise diff --git a/bookscraper/scraper/tasks/scraping.py b/bookscraper/scraper/tasks/scraping.py index 0694089..64b26ba 100644 --- a/bookscraper/scraper/tasks/scraping.py +++ b/bookscraper/scraper/tasks/scraping.py @@ -9,6 +9,7 @@ from logbus.publisher import log import os import redis +from scraper.logger_decorators import logcall from scraper.sites import BookSite from scraper.book_scraper import BookScraper from scraper.abort import clear_abort # no circular deps diff --git a/bookscraper/templates/components/book_card.html b/bookscraper/scraper/utils/__init__.py similarity index 100% rename from bookscraper/templates/components/book_card.html rename to bookscraper/scraper/utils/__init__.py diff --git a/bookscraper/scraper/utils/state_sync.py b/bookscraper/scraper/utils/state_sync.py new file mode 100644 index 0000000..fe8a7a3 --- /dev/null +++ b/bookscraper/scraper/utils/state_sync.py @@ -0,0 +1,141 @@ +# ============================================================ +# File: scraper/utils/state_sync.py +# Purpose: +# State inspection + optional sync logic for book progress. +# This version provides: +# • inspect_books_state() → NO writes, just a dry-run +# • sync_books_from_redis() → NOT USED YET (kept commented) +# ============================================================ + +import os +import redis +from db.db import get_db + + +def inspect_books_state(): + """ + Reads all books from SQLite and fetches Redis progress, + but performs NO writes. Only shows: + - sqlite row + - redis state + - merged result (dry-run) + + Returns a list of inspection dicts. + """ + r = redis.Redis.from_url(os.getenv("REDIS_BROKER")) + db = get_db() + cur = db.cursor() + + cur.execute("SELECT * FROM books") + rows = cur.fetchall() + + results = [] + + for row in rows: + book_id = row["book_id"] + sqlite_row = dict(row) + + # Read redis state + redis_key = f"book:{book_id}:state" + progress = r.hgetall(redis_key) + + if progress: + decoded = {k.decode(): v.decode() for k, v in progress.items()} + else: + decoded = {} + + # Determine dry-run merged result + merged = sqlite_row.copy() + + if decoded: + merged["downloaded"] = int( + decoded.get("download_done", merged.get("downloaded", 0)) + ) + merged["parsed"] = int(decoded.get("parsed_done", merged.get("parsed", 0))) + merged["audio_done"] = int( + decoded.get("audio_done", merged.get("audio_done", 0)) + ) + merged["chapters_total"] = int( + decoded.get("chapters_total", merged.get("chapters_total", 0)) + ) + merged["status"] = decoded.get("status", merged.get("status", "unknown")) + + results.append( + { + "book_id": book_id, + "sqlite": sqlite_row, + "redis": decoded, + "would_merge_to": merged, + } + ) + + return results + + +def sync_books_from_redis(): + """ + Reads all books from SQLite, fetches Redis progress, + and updates SQLite rows accordingly. + + Returns a list of { + "book_id": ..., + "before": ..., + "redis": ..., + "after": ... + } + """ + r = redis.Redis.from_url(os.getenv("REDIS_BROKER")) + db = get_db() + cur = db.cursor() + + # Haal alle boeken op + cur.execute("SELECT * FROM books") + rows = cur.fetchall() + + results = [] + + for row in rows: + book_id = row["book_id"] + before = dict(row) + + redis_key = f"book:{book_id}:state" + progress = r.hgetall(redis_key) + + if not progress: + results.append( + {"book_id": book_id, "before": before, "redis": {}, "after": before} + ) + continue + + # Decode Redis bytes → string dictionary + decoded = {k.decode(): v.decode() for k, v in progress.items()} + + # Extract counters + downloaded = int(decoded.get("download_done", 0)) + parsed = int(decoded.get("parsed_done", 0)) + audio_done = int(decoded.get("audio_done", 0)) + chapters_total = int(decoded.get("chapters_total", 0)) + + # Redis status wins + status = decoded.get("status", before["status"]) + + # Write back to SQLite + cur.execute( + """ + UPDATE books + SET downloaded = ?, parsed = ?, audio_done = ?, chapters_total = ?, status = ?, last_update = datetime('now') + WHERE book_id = ? + """, + (downloaded, parsed, audio_done, chapters_total, status, book_id), + ) + db.commit() + + # Fetch updated row + cur.execute("SELECT * FROM books WHERE book_id = ?", (book_id,)) + after = dict(cur.fetchone()) + + results.append( + {"book_id": book_id, "before": before, "redis": decoded, "after": after} + ) + + return results diff --git a/bookscraper/scraper/utils.py b/bookscraper/scraper/utils/utils.py similarity index 100% rename from bookscraper/scraper/utils.py rename to bookscraper/scraper/utils/utils.py diff --git a/bookscraper/static/covers/一剑朝天.jpg b/bookscraper/static/covers/一剑朝天.jpg deleted file mode 100644 index 8b1256e7d1f971f32da86697683fc97633fbffdc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4238 zcmbW3c{r4P_rUKl!`PQXJP|@;%fnbgL>O5UskA&r6tZMXqD4sM!DA^;3W-8#v9*xA ziVU*P7|gz4#@J>UGYn(i>3M#?_r0#)U%&S~-|Jl0`QGQ;=lXu`bH3-gg%d(Dpy=u5 z;Rb*}ARtJz03ikJk4TQV2mn4lfDr%yl7P4$06;_*NTh%t2mt?Oi*|luKcD^F0-k>0 zzx;p4K>oIcKRthoP$=Bm+9HgiHrDx@JibsM6l@6ie8JY1a8n>$Ti@XF);X&@-j`=H`X?Vg1MCi&c-T>%i^qY_yYd?;yj19!er4I^D~=U8!M~J zTs~`kV@0sFzA0Gat*vl*%d1>AXO*?WSz2CMWG^qU*z-%Qxy2>s!s6`w0%LBT&YYW> zWll3@r)KEX=@|-zN+474t2`!WRrJaNok5+NCefydBr=vt!%?VMGG&rL#89adV`Kf3 zlc@3W5h8J7V}rZC&cR~GYHO>7Lc#hvZ*_HLWrfXVFR@sQOG^tQBSZ7^a|{N3W@dV7 zibkbU$Yc@@hn<|9K%-H@t+mAk#=<;(er{%tDT1DwWzZOODt(4BGfkeJB2CeVG%A5g z!BfaMG6{>vVV2noON&ezl{h@mIXc)m^rHhcjO^>}Ms|00b#^p1ycKS35b@)q!#{=w zdU&fW{KODr3P&ShNVriNd6G=P&_xI%y`zKO1HE5y6QjaS?izRbAMI^|P5uUdjmPE8 zOw%S%eN^H&35O<3j$$!GLw(<9WE_=*r4U6KCWtr;0Xv4DL}SNCCov=A=phVhkUmA4 zrV^$o_-P7m59sc{##OJ{>>S zR9DTSQ@7T+bSmjmXfPIo<}S0QDWn^zDNF_(VQ*7c@q{x+xBk`ST;Q?$nd$6#CW9s_ zGVb_cFKk~Ie|d?wG++L(aDj}+qlPEYC|`uj?Zmi=-tOs%v4ogt28kfxEU$A`mS!0w z%ouiXV5qB;NhVEWu|mG6hXrgFtGujCAP{V9Y^;k4&l8pYXYH4kmIwp_27~GE?-zBx z5C^ybk`fY732{j%6e=YpDJ`okCo3Z(tG;c!g0ki>TDvrV(a_M=GlgmIHqq74FxY2k zVrFh>X{iOXb+Cckn_5`HeE&xM%|W90LC%ATcmRTmmX7B`qTgc&`YEfxuuf2v}SkA{qst8zMgdQ4-&# zV}4L#yI(MLcbqc(cEK}AUFU|cD*huZJ&W`4=~B|FJJi&F(cc3zFf_8XvfgK7YvX9 ze^Fgi`|?#|Q}g=|A3wFUe(UV|-i_?(?Hfg*$1vj)lUNFsHZ?s%XUsC$%PX8!E^lp} z|C38JtNzCN7uo;eQW9~AK_FlV^d}cc?5aq?N)T}!bBS#S{h+~d+jqlnODa1TJZtzW zrEB5OQaK+#BCV=tNzrHjMEi^E{|A=-Uy=O_?B85iKo$%V6%VWgI09pAt*m!#doN$P z6%t{O)u%b)n#YoebaX_@xIPudg#5I417*( z!fZM`vVomgq##E+7?~LTwmKbg2QAjI0Go!S9kw`Uvu50heR_Abx^1la!Q~oCJw;EBtLzH&K=8{uPa2Buh*K! zG(3G$I(D^mn_IHVKF#|tTJzhL?jbcVxfVOOm`;K*FP*O19;klZlzALhp>CO05VMT) z(h&IA^h^%UAgd$wUj*;*Rabwh+lrTR$a1eRiSXKQlw;n@tE~O~;dzT*dufADfx2Ht zt_Me4xeRvM@A~~%88|vBmZ^ZLznmy{=m@Icy4TW(Ww|e{y}JkqF1qX-hsp^(o>Wx# zos9cY)ArQ*sO@p*qwyaO%9~v~qp_XCKfYB(b&pAmEOYTJj?CEm>O4_jcH{ZrH}LhA z+46QW%P5V#pCt2xI&|Nd^2W}G%TH7i)LcRK1Gt&sdyZx%(3@H9c_X@QcU_zA9&x9& z=IMK=8hU3-(v}pZ7O?dPG}kPX_jSf$`$q<9y=Njz#sK{%>PK$1nu#YVcPaZJ0INLZ zqKV^s2SzRi)zqY5S_s@EEZqIIx>AM)d%?f^o_t{8VIQ+}IKms+ubdYlxh{VrLe|SU zzN1|HRPdMBRGO7|tD@Ar&*8($1?VHYAL1_lmhr(z;gP(Jrp5701ippoYuS4~dIGGS zD3;#?KiFgG-v}>NLg2X`kpZER#r|_oA138i7%(J^ z2RHp*c~KmH|6o^!##s_Pk@J-vo%Rvh*YnM8`cmw{MXNo-MPuP_FU@s7{_T$7L)jG= zKQXW1hsG`Vx4^Js8`>)OZSkY@pWRkz&9>OqBjqsQg#ZHVuItffHkoa$=Xg7{xmGm@ zW6+?RY-l$Pq7j6~xoEvPgRaaGe~_+L8k%zaCc7;fra`mJvQLvYbb@5N#X>QUvEvDT>}3AhDi4c;S)5uQu)jv@Z{{%h{#)T>NM;)_=nM*o9B*}$ES|< zhHQ6cn`_%W)Ju|R`Mu1`hqWvJLu0Qo?z_5mL;k5lx@(|rxChgh9r7M**J>=Ur5e6G z(R)>Yygd?YP8KMZhEq_DJLvO!RoRP+wEc9S((!wrm{G%^Ij)I)OO8R_{BC^zm)12 z2xjgwsz+Ucp@=(Y7q?jzV6Y?8*f*tN;Rmu_deCm{>xq!dtKSWdyEQ+XbbGEe@kOF) z7{WrY0A8w*-EG#K46CrIGe3)ntKPf0zZ{x*xHPDd7Ar7@Z*(PnAgcaxF7QGz)-?}t zSfwW(;O~ksJ7IdwyI(@V%I;K>1KcN2dY^PjfPAW7u=)Vl6{M+f%4og{Rs())P`{(L zGP^>0sx2ZoHI*wBU3oEk)UB-a?3>oAoM>3QnGGG<@b*JP+p(|%RTLlpL9C>S)NJ$Fp^**10tiSNYk`Ezaonoj!7JU9xZotilK>lXv=U^6YOP?<`#Y-u@!- z^5WamLywJh6JkT}S3##@>;x&RuOG08^}2psDYqSv8|_+K=BL|L>d}e+5O)a=a(_o# z@kCl&I0&~e&`*uG^LGhzq$aUlmm^Z5NnG9DxTMDJ0;yZme=bQy2?2#Z&{EHv8w!5x zn0GZ3%>9W0?^^;MywxWvuI?)N9yqe;ko+-$$tR4SVWPUu`zdx=_uNW1yGG9`uWsbr zu;@-o;vas1^l7ed3_WS8^RV0_#4q==Cpku7cw6Fgg2^re3^kpayo)+Ix|hoxvQOJ? z?JUmd3X@HiXvr1hoW|i@9Jx8>ofo;oxPuly6!YuEPy>me&ih~J09=eW9FtZ2~T0E zk^MC}cABuLo1lh^+Tx=qJ+5H*oi!?K^xfvp3t_L4Kl*1m>Oel!#Yi9e2my#EXKxSs zB>jTqKKz*WIx#gsiQpBf@_N}mL!g;-Cy9sD%~?78c`W*L=lH2TParGqCZ~u7l|aF= z%9%Vut1C{bTzG`|@1L_AEzL&zI%(RvAveLBKH@ZJ^uk9YOxBG~CG{g@W zQ0?Mkk$X8vIwBfNGRT2OmBy#|1?c+!`>+vF8b`n{a}Fohgev*b9|JCBoqI}*uc+1Z zC9M$a8t{{jxqt=K*1`C|vfR)N2uGp`vbt%DFtN^WaG{_94Hzg4F%0%p(Ji(0f z)a{!85*usS-A9;$!-H|^y6uL_u6l=%#W^&zDRd4Im^V=6jnD<4(j+tGqe@@g-K%V> zi!v4hbPf(FGs88ej@AMWMtkhD^E8B4na;}}TCI>m1hk^}zKPX;AeLg3Q?wp%Vn>dY`x-J1(=?LleGW}G ziBDJ?{WO1iN7G=OhMS)KVty`?^+Nicj$E?H z(TXDlp)C1|%iEu4-^MR?q<9IQTKhg?OgMmSDzl}vxVBJr-yu=B2iG=3{wNE!X;4`x mhAtl6dEVm;4Lzn+KS209NtcJpbLL#u+4dSyNVZ~$aPps+ZiA6>wnUVgwKt_|6SGNU zZ$XNxJv%)8JC0unRU^0|8~_9Y0WNyrJ|frS+QpL5v>V4(%<0UAKS>i`NCAczHc z*$WT=0Dx3~r~P-}{|=A>L`g+WLrX``@VBAyDu4nA0#Q(csHi9@|8__Gtpg}os91&M zb*b4b9?)D5U{{Dn=hF(`s_o*i#P5kHx(3G3(Q|Tf^YDs_iQkZrgeWPis6y5BZo}aE z28Kpf);6|w_IDi+ZV%l(JiWYwf}ezhBAq9~>SXpPZhZU;OI= z0zm)8`XAZ<;bQsgqM)P%QPTYD0#bzjy&x7!Dj|7lR$U942LWu?6{2a`Z=v&RyXb@! zE%!KF1M&2nB9Il){eNlyDf_>J#r%KC{s;EIu2}#h2>AE&Kr8@lz=6COJKn5frVcDI zqSxn7T%UTNi0aj7Fi*e-aSB!};A&#KLG#UCIh-3`y`3@=O+r4UXndDYEZK-*R{U^1 z%Uu<~RNVIIX3zfJ+v1;l42k;F5fMIGcEVnhDWX*y&)>dxy7!>_aq7>}ua|)8drD2Y zk3HYjE-n_Ao0p}w@v}!7Rmcw40Gru1jj^|$0xTwF+y8T z^#eQ0huWdXl@DHYkip-kn_;)B%@?K0tdrye5Z_9Fb;0-v*VjGzZ($_fS?v>YWLUc> zn-vEr|HIUGg~Dt zemP>=@9YuPMyjAJ5IM5bjrrMytzW!!m4?kem+7b6z^VF@UV`N+n$*euW_&3TO*7STm=j~qwf#eAgHq$ttK>8@qv5Wh<5 zSnEU4=cIty!P`<_vAa09_{q5RjViyLlx>*SQ?_Hhjex}Yy}WqecE@U1k;5=p z^i|ui&~|79+_757-DIvEzIHC z9Y4VC@|6H%h|TLRX$yxN2R*8bKZwd15vDo_wBxHqbsSUoTEGcLrI>m!J^rO(L}gS@ zY{p_esb77~rnCc*%rAL2K&%TlNL`o=4iB@&`H6PA6jAprhF(?NIX)>RC{so{#NK}L zoE{(9sPynq{{jLUd2=%G;4FKqv6>CWw@vZZevGI)$KU8cHxT!WixI=nw!4Tq9@vZB zNhLc+Xnudq+MW(|N2D8;rFTT-E{rwbnIi0Sip;AVriGtu&z3T@IszA3pn}-rF@5=k z$*SKad|OXhr@#F(I^GblYgD&<;KS!TreN++Q-48NS4kcnEFo1oB?fFeUEK3PWd_Ll zL$~j&J}P`$OI|)MQ~@7X_;PCUno>)_81j%60fr{%`280<%W%AdCj z0Ym>_Jkn{rZ2~Rd<-7^>&ec;f2 zu!ahAiBwDScFcB-a7IDqo* z8$0tjm}5wz?HUwB6%_Xmh4IF$dBjd)#g1?lH=$`!hMHRlWC#Khm}0F=*~%jWLoi@2 z{sddD3p7Bbt9qtFkVi!+4tEhL@~zW1aGf|+jynbc#v51l!oUYoWx|84yI9V9kv)qH z$YbP$?}K>zh8^jS{!Ff$LZX4jEV&&~%no1nc4Ll&-9KiMWAf{QlTF#p1E&<&l9$U! znyTas=evk~{?*-8v7A*tPleezW@+8W>9rEK3Zu-+aL2!>9-R%ZZq&-Q1^BIunz4Vq zdNUJp=gRm&jG-*^b2X;SB7A1<=z{sGc#MYbvq`NRdpq;m9)T}TBE?!BmiWfU-DZFa>*uCikMYpZN zn&CCq9Y&DAM;#iJ^I;BW#*ufe2{~$3i7gB+t@X-^o?lO$ zY+C{uY(A)(&l2f#V`mL}SU$eDB&C3<+1@^?8t7Kc`)(x)UGexyGH)x#vU~55zfLFdm-a4azZZpe1fc zN={0_&P;pXpUX^X*Zx^JF)nj;Z+1@#Q!mBYuCeq#B^L$TA6B_0PDC*6{$!f7iGhm6 zTW*U@kNaG9$_=W_#uF z=Yc7w_Mqg`e&nVT_R+pXb$`QB2X}QXjz?(Nsnr}rLw4!so zjQ~Spz(YF0<>1x&$16Z1Dep|%4$xdaz@(?Zi2yjCw=F+SIx6n zrg`6mX-0A@Gy+sMijp6^1kfV`ux~z`kF%E_m&tBSW@utm_HdDu8FNw(NpO0{IaQ?z zWlYb_R&gLdOkFrNm zDMqRm%Znvruc$M=j!@P9q&uprB*kw28GFST{ASDhop)e|1l91|X^Dfk$skyEDK`WY zh5VB%2^6|tq%yJ#O9*^?WbLx%abcn3)J;CBitW5O{v6-#QM`2;fD>(J>edxl1imU0 zp5hr(pHkP?%53cqr%Ba72%wG~1sIW*i#&+z2ZF7hP4z?Yz{2Er;2*vvn^Oh#ycyf4 zr-$1l%P-QwxOc(zL=7B#x_kGb_&WN9Zy2+MlR&J}Qo;rO%obNu_My+!vPJx$#;4w^ zAv+J3V~|>npMCc4cdM}n>AyP@f!E98c=c~9+-Eu&;;o?I>WT<2yi_7Y&eUu}o)P}!7Gn_W74^61gJ z>Qk9{1NQJ@Jo=~_x*z?8t52~xUO>%9vX10`hrCNQuDWR@8d#uWYWcB{`p#JQ9TH`1 zTF^AxT(eR>iB@(s2LGeUfL&4~zs~;qFz6t9Du0|jS>~ln#PDZX z{rBB-@KJFMCk3(_-V*G zOpBhyjA#sAXJp6(pG;3}*h$u;mePCGTH+OYQ9*!}+#L;-_H^@Hxi!Ki$QF%TLuC@} zpWWQY<}(cQ$3cwB^NzCYlfnT5&~j&ry``58mjE8G(4@ytOCIIwL7K+yeo5mAL5Of@ zWW@0Wp`90sRg!Q1f`w#{VmPxUWH{}?G|D#7 z2F+bAF(`O&#nX8^bT(b`-uQ;(AOTow#~6kMl-23Af^jJS2-?0|YDB#JqqW~bhLV7p z`|AcS?KLOm^g1L$Ib3zT8nW?B;wMqg8Pz#-rM@3xnEX0IhA!uY$xd1=R%PQ#Xrs@n1U z63Z3~WCeUl zD}a!noSEUF*~S}V_Ra>!v_7zDJ-?npi%2lK6JYUUz~RoqHy*Xml>FAmJf$KDfVzW* z<#4M3Zb-Z}4h$xuyCCnw#OwrF4M5sfRt*+|?Q8tK!THN{u6OSTM-MEw?3{gVAzVi* z!#@mh)CG2RjW<*0tI|+s8)_Pke^LqM9)ro>W97&2dQza_`VKbS8dy)@G(XWg8d(ti z))jLFF0lX|y&GLrv}xzid)VXtfzab8+I7qS?wyBGr~PJ{Wsnp}vLo?*O`UwsTipZn zlHZ$74^QnyRg&s>WmsN!B<;dx&6d{n$yFZ)XvJt_{m5siLYm6=ZB&#;mA4-=($1K0 z{M*WQZ@suW{CB)aQ^F3A+i4M8@S;1q`@onV$|nztA*VbROw*pwsmla0}i($wgkNf1WDSNQdC`;*GfxA z=;3;UVlN$KZv{27Cj>FlXHTv=s+U%|+z@*i zn;*;wVoIQi9j}Dn;K+Y@6dBj}iL3X^=0~cAQ~dqAPt~x6qS3XGq?p^N0qsa1j}AOa zax8!KO8D)s*NoN7-a{sh!KO0*coG&8<~a=xXl;g8TBPZ~q1Chr{n-VEM~tqpM7;rCa^{vpZ z$wD7U=GQy4IAs`bTp^7rH~$1DDbeI>O_ere*5nZG6}2zM<@X7Mef34rvS6pM-qgYI@lWhUrSBN)h3h0a zMeWq#NSBjoYKg)fGjQT9c^#&tO2Y|a_*VqjV;1vy`*}B8j~0+V*MKiAC4tt1B+VTu^1-a>qvkAT8jYLoQjqJcFPtfn?4EXrpW?!-YO0R zmLS4G9ScM4{7%ck#lDmtnirP9xz^32LU($JqNpu>vdJC7kBE|C6F)(eg1Ek_Uz ze5An?Mc+4Cf|x0aE>e^9J#GAzs!a)M7M{)RT5U@)JG_PMRuyb*=${r&lXR)Ey*Iu5 zSeQm;Oy8)ibW2Slegfg>g-8knWc5h4(VD~~KGRUHp%#36_%+A$J-3YyPzp4pdkH5^ zFMl@&hzX+RcdsbSm23bZ(Tf~Sh@uFO%z(qhym f=Q#VT?iqD`2Y{70#OhgRo@C1OE=;cVa^^n(95`x$ diff --git a/bookscraper/static/covers/流氓高手.jpg b/bookscraper/static/covers/流氓高手.jpg deleted file mode 100644 index d269316fb17b85620e4e38dfde88c7b5286f6705..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3625 zcmbW(c{tQ<+XwL97|htRWEmPeVRRQ`vPWc>HOap3N;LLl8A~ONn_(;=>un2J#voE; zCyX)HEG0`Q*~#+gdG7alkK_I8eV+3=uD`DHxQ^p73=#nrTE-oi=NlF@pLh*|!T$4vCUzR~3e}4j^qoZSFV1zL-!H|M*LF9j2 zr|$q(DDVhy0fU49DpnAf6?FOm;0FK@&6(mi8a6>`HQMtgju0U~c4Smi9#mMp`aOr~@P>$t(*q10Jtr58n@9Ab zn79N2byZeQUP1AOhNhObj;`Jdx^&{{BLH*HgC<&dg z2g_Bs$(x^G775;xn0{O56e-*x&QW~77!s8|l!yylRA$_SF4J|rg)Aei)x{cgCNQN6 zPKHttvF%r*7H_8VrfyxSVs`IueM(%=ijMc?@bOlyy@6}Uen8=)BZ5{{fM z(}UGN$aRLmvq%BX-HY`%uvAqP=eX3ALbF`P(PteK{$F}o6S4qk_n_5lidQ|A>qV`* z@WlBY*wL|~@wX0e~0&zP^Zeh0@p*6HPDQjOxTxl~e@T@0fgOmLCb_ zeM`=K0*?Wv0~pwAN^|#yd4jYx9IS#DH7vUj)ssn1o;yIOdqRfdqWdlJbdGKA1K;i(KYs zP)Af0O{kLZM5eWSdsJ2sa+Z^PbP>E9Lk`I8Pgsbm(N<4yhg^!zB)9bs#y@qoyH5dS z`e*D;FGcP-ab7G-PTjE|8yr5^bJ^0Vq{?$bTHxUeCt^cpiN3D&h)tbMuIZphLkcSg zqbl2LE~H&=BcT(b>1IU#t6zafZ?O4MnwGuBPr}z#QY(r)tnf7hy9^S=8mda-a}Qm9 zzVzx%JhnQPoKRu2MXmD26osYd;XU$K8vAIRe?&%DW%S`i)s;!g>qmc-tn5|mK4R-w zlyJSUX((TrziN!IXqRc`@0;Rw3~a*Qy;-GeZ8mCz{5>|-Kk<)o6Y8uw6mj0 z20W;Xl5t5dR=r-ATPRY13LBTbzlhxioUbhxeE5et`yJD^t9Ka>hadq3UhyazrKhUy9kKDKJA9F{3K=b6z{)NwT~H;O^ur-h4BEud4+ zMthC~dpK5PziQ6My>Z;1ro~CV43ZWA@Xnon&^KMN23#$EF0ffR9wZ-o()Eo?17 z$x^Y#twyZ}_M^j<`RNPFzR5_i-AbLGY(9Qi*KbdF_T&35;h|)+JJO5#o}cNeh3U+- z*^t#=YtSz{i4K9ur+`lFtw>w$`j=Huf@A2=(HR60otz9gUPQ=u%^yt7+Oc*#-dNM9MdCq$<~9YSDi(p+?!YA5|z zmfo)^vC_px4?R;{lgNtkPC@E+ADUr!d2rzF1+yU z#m~K6_`41#He*-adk4{%iuL^67OR~5uj*6cj`V#jVDK2R*VJDQDH?JHj1@wWy5B#Q zH#9U6(kz-%thL^Cc7LgL#d;@<-b&r$S0N{Fk`$dY-f!m>zbi*ESPne3B*x z`~yQ1`W%|P3cdN$xeNItvdoz>3*L^j`mJP_QgRXk#C z*Kc|ddEHzTqblu!?iOG^i}A5L-=!g+x)ybrO&2=Vrtxk^8>ttulh>vL=~ECAMsx1C z3zkMgGKpT274e;u8ht|FZ`G}fal_rW2iQ*tGI=iSJS`L5GlL?b*VPXgqpmY$xiz*A z+*>tlbt7QqhqdwC@mV?P3#Wiy4}Q|yAVOFhnX{}R!0(1Ns!mx9#mB+uncA_`u!M*8e7PYhWFKL=*D#LD!yv4zTRzCFc;;6^6F;Ayy?a^}%r-?>@Pthjp8Yt9YqfOL=X@ ze(E9tb2G#@PN{c$PL94g{F>i@t0BB9BL}O*O*do_D!yr!It0K12ng~Rx`!En8 z;KQpw6-*sZnCOw-Tfa}_#$72W@$l`I?)xu??h$z0xj%ZUxFh@JXDXY5J@vv?zM;)ZH64XxeFKeJs}4?XY`^62+@(l3e|@rn zQ!BUH*6VD0p>Ot>ld?&O-Wdq=EK!&0LvGD9dwBGO=ewq~QTdE>36FuVYB96etM|BJ zrC=rK9*&m;WJRj{D(Azspzbfnp*WkF#`%=7fH&R;RZsN&5!Ez5vKClEdceZGG$GwB zqk>tT0Sf8b;sTLe3i4eXfny)6qPzT14{Fj2O$gYp_qx0C_KYLW8;lNZeRC9pl)fq3 zcef{_o}n6KYd(#?-S1~(6Pb%{XLP4_YD~%xPQB818Rt79EvVnwpuDTnj5eR`Xe!Ei z?k<+IE{V7w7HQOL1otz~|7lnyQahm;ID8*GHme{6P0rwZomD(afJvi zhwrRoiN&9KcqwV($?7iP-WjP!P+ob|otm>{VvHxCZtyutqcXhEAhnF5di^#mm22nHZ4Ev=#|Ew3sgeMeeGRh}R#5*`IeA_tHVZjqLil}<{z^&eS5RvB>X z^MAX1NvI?KpK=62CjFms13~_mZV?{*-@E>M_TS$AQ$;8wrKKP@a{x`iEuw$v-|`lL zh)Icw2_+c`2{9=-896zD$tWnPDJdwZD9FgEXsM{EX=rF^$SLXQ>1gN)GR?o85D^m- zlMs_okdRO?(ooSb{y)>rcK`zgAOi4`nCLFx76TD61JO+{07$Si>3`Jw*WCXhqFcly zq-5k2lvLD&1`W3f>JtSn6 zCZlKpxfi}~DXCc5*f}@_?+FQuh{`J{Dk-a|YChJ|)_J0e~9o=GOMkF6Q{;^zYd@_TutiTtook|H2}a|3_R51YEZW?|_8- zUtC1Df(T5^Ktg&~hKy0enB38aiBI+o1@oiiFEw41{BkB}mKVOGRICE>D}tDRq5TKh z{~fTm|6j=d7qI`03k9GdCVX)W#0&s+0G^w%^P!i9Ie)UZ9hU%_RP4)4V?ysOqJgia zj`QAty}WvA!;L*IO@)rWQ6cA6k>};c*OCu)v&Fb^pOJciTkg+J3JcHaO1?QW}qar+jbFZJ)tbGpU8>5rg={KSdkjmHl` zAEpKlkLK$boA-T5Ls;Z0BvgFsPIG`!nb)dtANiILhoEbnFIeC;6R zk3op=48HjV?BQz_LtyU-^;RlbHq}q6@w>97EBRzZ8mEiI>V+P3ztw#6o9;oKm5yr3 z{RSTTg3{$YH$9oHKF!S2^R^%r#G54|1v|~NiO4f!bY)*+tk7fwMSWC96xG8+s=+b% zdXzzlTC>6-3~jzNGj?jCO;dqzk12^lTKti&w&e%Y#!_%u6urdy3_(+0b--C(*hQ?_rMQ z%!k*^Ji6SkY6=^jeG}+({#ocKugwiGF^PfD$XsaI((y)%dSc45&28MLIzt&WJnd%E z*bQ~}#T>dAHI$u}xUM+>y`Olw!9~u%Be(HY*oL7%$_PeEvc!*aTdozAe{6g>I`GLLM zaqOkbibMATb${TLw`4`NlK|9oL8E7`ae3w?C>oU5jeO8MF!#*$dSyjfhc=_{r`>T? z-jhr6?=aeikz2u%Vh$8;fvAIdy5W%GnZcSxeNXOzry?nRizNVmgB7k64rzjLt^b&?Xx{QI!RIa=`ZX<{W*^BtL<&u z04h2sy_U*QWT?+R5o%d84ceKrBoh)_(s>sr|B_@aq>L(_0B_BNTy za2Sg~M`1{Sxx*WL9gDbanOReZh|>9PI5CbDn+9T8hPd<^F`N8ea&JoWye->5Cj4wU zvE7E9_kEZ}8QutMRN#PTMV}{bjGxY>O~5<-1JB@`v(r^Czq1R_ly3JfCyx&enB{`Z zHrT*&=k^TUjy3a2F+J18g*ggFTbBrRye4|!tvr%=*BUWn&oz_!>m6Tit(+pcVNBlN z8La*_y{B5D>>2HG+OS+_9&3 z71u6S8qPNg71C?%4Yc44XH#Mn14`)grT2UTq8&z=7=8v-D+#K9#!(M0cPgv* zoHo?w%6k`Suq`WG^gO|uW>XuOrI>2Y2qA0#UZ~Zp6RN=udg^|eXX<_!`OUa|22fq9ZuJLo8twmGsZ{S}G$8x(HNE+f zK(g`kkwI&@E`cp>0J+A`3e0H^*GBVz|xPu*I7CK@GYqLx5Vyl%93~=T(f*%UssH%NoerR`kdPU$!lay9T1htXrn01 zD9TtJ8V*|>=&61Wq^$(mt>QSQ`mg_1I7a`8YU!Mtez68hxfMC_ZN&YuZtVupC#>#b zG!yyH*!8erZm;%H zX&L~|RQK@8V6Li43wF#^K_3)byhywo*5QUzj5XUu->g2ym&xS#p@ZL+?eJQ){08vK&d@(s#{@v0A;lSjKQ{V@5)evtS?cKakNwpB89X)R`|9|zZ+m8l zJ;LB9!B|{Vs^njf0NLB6K98(zSJxXcEtKg{nvD0;tNh@yRMJS$@?C16Lg}u~`d_In z3f7N*uY1b{H4!w*aP@>M?_IN;C=Wd@H_%0l&QrfBh+f&5NT_ls0G}w|08kdoo@pyy zZ+LllnZ^aC{(3Q4Rh5U7Q@^g+8GSv|5IfH^bkTpo&9X+FAh!y=7iZBi=3h!4XQHIH*|S#nksfS6dsb$YUIYKj)5M=MSJ10W z!a5Cmi>#5_4|Br=8VHhgRBi8pOH*Q$^kBiw1ENX*0MF|O!%#E4_xR=%%r&{%b?^*Q zUfP@@%%!e)^wDXeTv#-$vT33d+Jz5YIW&5;+ud*}FMj%Wb&xBa6h9euX4A?~QUkNa{%}f@soBIEn;hY&b1rPZWya{f|EK%)2$-01kHULn+lWF(rKtaKe<* z$#eZvm1@_4BM@ui%H{QL@+q6jQvipovIP8PsKpQx@R#BX76HZ_4WLdN%Zq+D8E0#q zxX5{u-iqvAhMx>@pV&`13mHm{Xl`KHs5M#M7w!raunkL|C3@LdUQ(}09aOrk&9%W; zt-)E@d+GW5;CmBk_DfISoXg1*X-_7jw0Wd&!Fj>I17PoO?*)oL{9K&mw3~H7$MJvp zTL=nXxPQyZoyAR=e&|~E22*P~N8smRrEz5)eb;Y-irg9%)8l;D9qP-4Kds+Z^ZcT% zJ5Wlp#d3z&*T6AChtCmYcom#BcElJQmOZ1Dp61-H4~A^i=roBX{_VFLnqNQMQmF=u zmhxJSr{fS=!EfG}mg}%VSSFFcKfAEyLLl!xo(FxU#`PT&VxBC~+P~DzOuDlzEfb4x z$G&+mTmtu}XXp`6^a*NNbnJ56r2iQ7V{7MGhRM}LStufJ5IVx5tK&be){V7;EH99M z=d|VJ!@4-HvgHwhR8zFe>=m~wum3#%K5EaDSMoQ4hC5Ao_EhIBo)s%{1E6eZSIujx zLhYo}dXTUw-A9f&P&FIFZ*xowB$0(q?``c{;FyDPXM$K@S+Fl|pkP1|^*aW#wpV@2 z^@Cr5J1SsE#?xkj-_q{Suz>#E1TB!y9|*@KJaW%xz=$ofE=^#=1uWlW9=6lE4}F$p z8ci+HBLp5^%AEcB^|-AW`2l>S<-YQJYG6@yD_;HsNPR^ZWbA!SrlvHy**SUov#@D) z1F|V?udw7NkWiTPEbf-K+`u3ZZMGB*rMfD(0lXA5BFF0YnO{p!2ZSs~(D&09>Np9u z)CcHRRPorhr$Q*A1Rq-5S)ZQWtY6aN}YAdF#BEuVl53la7{6b_F9YSo|B4(HPG*87XDiO+CU-xs-5?qzJckJaX2{} z>raRrzpbXX)q>C?4m&d!c3u@U;+ldKEqb!7@D1~z+58x4&yc| zCd%vHMbe^m?)P3Nnw7OzK#}S~g2f_3uWtnh^<7Fftszl#mA2YL0rX^rJD|%H-P2jOBZ;%RWV-cDjIq(SdE$kf)cBa z0djy0Sn{e1Z3p=K2cw^?*}SKE!zbtAm~-35{s6}T9d=-bxh%cmRl*A448!kV54Vp> z9f0(mc(GODMJJOP5fvCYf%o>4iGxtnq>y2!Z1j{;G{!I9RZVN?V`YJ^>-e6Z?V5C@ zsZ^fBr-AeeXExT=v9BL1HptQS;~lA9>)C3yqc;F&7$>aj3CtDZU`Tq<&ho|9Yf>RI zvp`os-1@%1^8HU&o4Ww9yyj{;f(!HwIut6kUxZoevgbv2%(m9Ij^?f==$IedtMor4 zZlub2t$H~v$(vEzWrfHaGqidq1m1J9SaIkRGor#W8I9ttiobMN_I#@J|FLmw1~03* zlQif3@XqKpBSl5D!c>|gc%#RIRXNi+ zQHqV2E7L;#==tc`~ z8;+&$Ec`3TUWX4i02;gq&M2H7!?=So=x91i#Pc8bC-nuN8}Ld#{z^6%$^M=!Y56Vl z8b@`*(D4>X3@#XafONnH&tLobFPg7>D7oLIW?mf-VNtw;)Q*n;ckv2gyawpG+<6(W zUIa&mF?byt*~6*dvqlQHL(SaF<+8KaRw^uh`umvYUAoNa{osiPjCv&BAGJHsNd$w! z5`!Dcux1rv;`$d!3$8Anf6(wQ$r7c-$-&_K>T#X{oE`t}d8&h*UX_z*LRw3PB=2og z+tKqH_*Liv$?x`cW_tLNDX*epEe20&`9x>>Uf zPJE}vfmX}EH2Toc5^j!U^aN=^$~9IwAZZd#4E4gF@4ye8lVWr{oJg_`V9QBBa-;`+ zG@=&TR7uDLIF`DFCbXc$NT&}3g#ksS5YJA7xGqQ#hH=P@RV1!Z;&Jjm@!3SbjmXBtdR&D?vsujYjSqxY+E3gH6wQol~{D1sv7@9g8y%S9W7}PN93gVd8 ztxoD+^zJ zl7Gh|Ii4JAQ5>##l~)0}n;-0;ZDpm9>tNVERZXWaKh30n_XDT)opb?~U2=d0A%}#v zKSJrRczvgz?J-49!_~Mg!}T$Wbu{Le>L1j<$#Q;yH9t^J8f#P-BNw5A2YnV@!`BVKMkvrd9HLBj2sWnPLZqQQCy^&xS*= zrG6fgAf+!T3hs0_FT_x~i;(z!@cIovPiz%d_YLW`3r%T`dtWYS63;StcB)Crc%nvd;t}r_3XDkXo%)^4r7I#y7uZQJv|KV)(n34OF8duy|IP>WB4#rU zKmyApR1?Jr|NRu)MaJ4OuI|kCswF(EG+4c-XD#M+yKY#(YPOhF{jh^xwkU} zLhfSVp$v%T_*eOBs$Nl-MJ2rl?q1Bn za;s)+8LEy?HXB+;$Ee$gd8d!_bhJ{0QLIEsiEVfIdBoO$2@YuE8^CPbnbVZK(Jb{p z(?=Vl;p%0JC2`>jtI6*7n#K;_wY4|)1D)n63qIZ+Y72$OdZ|>t)?;Wh=vzEm*w}1n zuN2BxtB&zuuXrl-4&GoWYtZDsGF0H1ex7x)9gZZ7Pm-%*qzjD4)q!5TlOsH=9TPAe z4N|)LAlX!qDIwwcLWv8~s~B|O=D=QiDtA)xaP}vE;@C*c5?wpcxkBFy1MInsb>pl= zM*A<(x7yFo%2?K$j7hYA`|++BcYniI+X7)-!5U#6jE@MFt+h5a{5?PU9JPzEIr2~J z82V;(4}KzKG&5bz<}7J`@yt@Wgi9U;FAA$Pg%*Xhn=U8v;@N93$eu*r;Ho2y)~VAg z`hU<;y&-Fp`+wQ13KPR61*Puz(X=p%&zc~EVpowb5m=#;{XHBTA^AAaDTxhpDP7!U zo36Dku3V+O9`&K&n!O&T=4HL}WUZ6TAP~b!`A<1jnp?jD(?Awn;?*s=<;&zEAJ zB+k%!y~kLWEDYHl@_s0!UJPZ$ocA5kyD|hvu1xpF^Gfh3f|FOcZUFQHLB&sb7w?Dg zC56QC_y1ys#P`sBX;Gm)mG=ud7MLnI31UV^a3gs_gN)P5ZL05wIC%az8F{lPdG+~P z&=wolyTp4fV~5?-i0n(*RV8?T9+FH3aVB-o=*|ux+@!^+_x0*VA{i+LU(4Al9T-I5 z6vOXgk5&mmIU8Z7eSgJL&%LdflFyfLJy@^h~-A?YYfOw&PYsf+0 zLXM1`d}}=F=iRh>=H79fa|ahzX=xv$9V*pm@ORJ|J@MdP7_S=e`VWdNW+dis#G~n+ zP9|i63|(BYwT-THdqa&|KG$KE>}>2E%P0CgpTEZX6j@nkn?K0;yLP@g!nNd- zzLA(~=5{OM;0p%4LjT&|C=SNm$rr9_L~m!iV?N!=6IE(w(UZ~iCnBF`>McJC0=#h&um=W(P!O5plQme{KMqqm=zTC-t_DWUj;eR1c69N}&YLu(!Vg<0M6AmdSw ziY1e!$Z&&I`j{*lD3ksQd&4Wc6ja6c=W81R-paop=;n9cGu}$g@VgYF{*cG{PIw`# zRHBWyZ+Rs4n!OH7d@ruR%>iXyv{1qNO2SZG1@iGVXql_sIYF8HzTbD2aXbYkqi2Cy zzhP7X5_)dbKB>UJ<$hoDEfVyPh|>=$hJ7RoYrJJP1*`<(IV=U+;XQk681wjm}EH#8gupK1^||t9q+vXuz1`6oRH*rTdYB8 z@K!%D?!F#cv28^x{!x!Gn6QoMptYESTISA{T_#3(iL$Q8svDNZSvS1QK5E;t2j#JUq?9k*z` zV-e~<%9`*C_8aq?9Z;B(?M=x&{RfFQ8FuQozkBtG^JJwYs-l7C#gdU*zD={a-Fo;O zb1mZO<*R1&2|T|{M@t~L%?@&`hKx&dPk1LImD!WWcI~O#Foa*vUCJGK16xjZ09^A! z$JFa{V|sz_7bT~-w9thJR6md6FxC6XH5*lzB&o9ZjpaE;aD`# zPT*61#xj)B(bfTbb`B&7Zo8%~z`Y80#1?$sV}&xKe|pi_xJ&P_sX{Y(>RFr4zr7cj z5Qw+%sI3S?$4~^d@C$Il4uWuAU|@Z~d33u5hJQtfFnP(-8d)GyAonJnB6JU7lLfM( zZBdjd$BajJe;(MZ)u2BX!rEw$~u_awr@nMB7M=QYDL_mqOjiSEpq zG<%*krJ>s2bVRD%=q{{oWS@|@=54nH39bMcu!%-VKQdaQ@;W~MWcCL1h^MTRh1URl z+VYgE{~XZ;a}g%seFPe(hh;yZen#mZps8D^7|f!rVAVj&GD3d9ouMAS6$m* zRoFHCwF3c$;LjW)alemNmEJ1=Y28}M_jl0Lk8utk0;X6;g`a1Cs@e+SMt}Xx7PwDz zTFruv=^2Ssza;^3#PkrnKbA^~ms$|H0bu=M9fKHzD@QGHlod6>Z?hZU9Xdizqc9e|Ha)NpQ~Nkkauw@I`iDl)|-&nA#r`HQB3HnA-^X z^&elX%Xc7+|NVHTm8CcEGbkI{xaCA>;^he-!x)*YW-6XgShK|~ysdjBg2wIe%3Nig zb{MV?$j3SJ+SCjZCzdGWa%kput5ccV$7x}K76a^@%^+6BXU@Zl>+6?(gX3xKE!nl( z^~%5*-1j_gEIm#=IHq5XWsKj-v+8K;8F8k7Tr)c%x_AhncgNRVb#%%H*R2f)&9p?aebsT2$R)4flS-a=M^gl6UZ%-*kq*3}b#t0SSZlbwux#coR+;k2bzGrC^D zpQ4l=Rinv)@x4ifk$)QLyv2&gJGp}0_Xe7HC9qO{U)@w6OII|>x&?XBG^TiRo%c*W zx;4aHMYq^KUe58zKN|EZE-@aTD9f5(S} zJ3|o*;M&_Ju{tN`w_oH*$@i|R`pI{&zF(0*dL6~6(O{s+Q?n6$~Vb6-hk7_4o$xPW|lVxO}q+x@zaQK| zE{xar_Ig)4=HLe=&hy*GyZe00AQ$KA>XqV4OvY1r5*okkpVc1Q$4p!kVW!^(+yJO; zW*6{I<5K%2<4y??V#Z1QI*oUeDA&Jk!C%s)k$)$ofwp1H;YLLz-3F=>a{sukj(OHQ z1rG6w05kI!PC$+pf<}h9wB81~1_Q&_F57=G&6dx1*6xgZN1g8H=VqQG52Rpg>bGZI zfi*|pCH!=1F?0p5_PARs`h_8U9{?ei!s2txa8Y>b&Qv8f`@204A;aN#1L#rUdRD-QG<5bfF$PR)IR9j>)|ZJcy!5G0EYDN4TkIcqA0o_S-oIAYZq)` zk!ABVdKf%qp>m$<0hRPF*^EtoslAg=8Etw$|B+YPP+he`#_fsdr==++p;v8 z)?!XWrW8-`O5CQKR#??>!)fby%bvX$+6ym1Ph}M@k+FLIuASPMN}dUu*k>voUt($E z63redd6vMzAHv>R_JNtU7jw~20PRr&q7O@}!e3yCafVo%)%2($@C=5M)pRGL)hy-5 zs6fh`9{Yoqv_Q)IEus`W zOS)n8(|CSZ-BEwM%Zf`EtylLFL->P|p(PhgR3wM#KQz7Nylh~S8LBr?ci>rKsSIyR z9#xd$AelSP^A?}hy!~q!zE_8e?B*SS$6K<|YtJz19|XqzX~lXtSv$P{K#vdh`Hvx! z{PRyMdY+56fOrPRkh+AFc`mXxF&a;VjvYTst81*A!dq0@WF*jn%B**MfETaIl<9&c zLosg_n*Cu27?+<||td5*R zB(gP&Jvq-4bRKxCFn?^xSsf9zdW<#_>C*tKPcA53`Kw8xw_^NdUdeC!has8WZ?8Bv ze5(x{_aHylY7c)B_R0l#m};ed8*i|jiPtFd#0HgOeLMuU(=|#&Ujm5csFndwBoa+y z*U}0TXG%r=9fbtuX6vgmk_lPx8vio{i$dEHEmrEx5HsEP^%Tq@#XsgB7d4LGS$wSJ zwz+BSSpnS_kXBxIQMi=I4`$qYc$FqEMG>rj7K*M!!u`vy4A1+rfRPP*b7Ec;1`1hE zJ0o^d0$gr)gSz(w48-ltiLGuqoFG57BUgpm`NxF<@HtwSW6V9wvo@1TSL1*P?Sg%nulIxU5toKZM+Z>gf@$SgW z)Ua)$;p6yh~W~o9dBnL_7v8o?PB)6#J=J|wck{Fw`^=Fag9j1LB_F8TtVMOp&Ezse#Trb>O6bdGV;VF;ci z^8mj=ODV9sUnQ?IEZ|q(^v8fur3kNaeJcdvcQ*}47u{xrc@_T|+yB78pQRPDXVk7WOoOjM*7_S=#;1w{i5hme%yJs#pyzLb%fWk+DSjTQhfI#_3!YPX@?mP zuJ-2YZB0fOk7}?4T}bc#@khJ^HVO4sM?={XA5T`uLmUrQ+R;@Ap0wzNvPU{3DhaZ$ z-#ObWmHURKwA=ooDT0^HcaHC7*bmI3`(1?BXDs_u1K&MELD~c>M>H^0J5UC!Wa~R? z=na5hujzIVb%+R;E`&=D3)uLy%(M-tHYcnIy5XkS>|(nr!o~+m$JCxh;mK+J?CxT5 z4KM!wLrJIbb!p1CGSR<{6gJ1tUu|thz*ro2R*$En-2@F0>cbGvUPR-A>pVg@8h%aI zfxaplbT{IPDE1E0JOKp0*0C#47_}~bmtq+%i9tkH1{~2@&FEC_SVQKNRc*gPcam*W ztOP7n%y*8gG^AXwbW8N-(Ea4h3q=O~OCpx1>a4*?gQoEc{QWOj`z6EimBy^SeMM&z zMpJju{b$6)P-vOhx;nRAu0<~loT=igL?*D6=Psj|FtG64)z@UbbN_U~OS2Y3mR3-) zw~h+->)r`r$^AxArz<~a_=KCbmnp$;{$)V*OLSs9r0A**#D!yZTjXwPL?e12th*c0 z2P2g~+L%Mu0L&+XvitWle5<)%bv&gFf9zjkT$mnvIEb6|fSRsCE#9WYD6qHaTj{6y z=}m1IpboSz-9un_R@vVt-7v;>QjD19RMkO)?!5^HNl2kgyI?wq#qmdXit>QzSf@5N zvSRlV4pxc_$X@V_;6;h)y9MaSeAJ*yOc9>^z|h~37F+>d-75oAb97p?c1DM@B|DFI zSBw?0w|?s=f<#Eo{n#Y@>|7x1VbQrtE7=M3CT(#2^{Hn$t^Y3%F~F!=NMRc5au$7+ zug1PT9sOyro>+>dqQij35B-$uh*9mdyTH?#i(YxIS~ diff --git a/bookscraper/static/css/bookcard.css b/bookscraper/static/css/bookcard.css new file mode 100644 index 0000000..588716e --- /dev/null +++ b/bookscraper/static/css/bookcard.css @@ -0,0 +1,201 @@ +/* ======================================================================= + File: static/css/bookcard.css + Purpose: + All styling for registered book cards (book-card) + + status colors + start/abort buttons. + ======================================================================= */ + +/* ----------------------------------------------------------------------- + GRID WRAPPER FOR REGISTERED BOOKS + ----------------------------------------------------------------------- */ + +.registered-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(340px, 1fr)); + gap: 20px; + margin-top: 15px; +} + +/* ----------------------------------------------------------------------- + MAIN BOOK CARD + ----------------------------------------------------------------------- */ + +.book-card { + position: relative; + display: grid; + grid-template-columns: 90px auto; + gap: 15px; + + padding: 15px; + background: #fff; + border-radius: 10px; + border: 1px solid #e5e5e5; + box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05); + + transition: border-color 0.25s ease, box-shadow 0.25s ease; +} + +/* ----------------------------------------------------------------------- + BOOK STATUS COLORS + ----------------------------------------------------------------------- */ + +.book-card.processing { + border-color: #007aff; + box-shadow: 0 0 6px rgba(0, 122, 255, 0.35); +} + +.book-card.downloading { + border-color: #ff9500; + box-shadow: 0 0 6px rgba(255, 149, 0, 0.35); +} + +.book-card.parsing { + border-color: #ffcc00; + box-shadow: 0 0 6px rgba(255, 204, 0, 0.35); +} + +.book-card.audio { + border-color: #e65100; + box-shadow: 0 0 6px rgba(230, 81, 0, 0.35); +} + +.book-card.completed { + border-color: #34c759; + box-shadow: 0 0 6px rgba(52, 199, 89, 0.35); +} + +.book-card.aborted { + border-color: #ff3b30; + box-shadow: 0 0 6px rgba(255, 59, 48, 0.35); +} + +/* ----------------------------------------------------------------------- + COVER IMAGE + ----------------------------------------------------------------------- */ + +.book-cover { + width: 90px; +} + +.book-img { + width: 90px; + height: 130px; + object-fit: cover; + border-radius: 4px; + background: #f4f4f4; +} + +.placeholder { + display: flex; + justify-content: center; + align-items: center; + color: #777; + font-size: 12px; +} + +/* ----------------------------------------------------------------------- + META INFORMATION + ----------------------------------------------------------------------- */ + +.book-meta { + display: flex; + flex-direction: column; + justify-content: space-between; +} + +.book-title { + font-size: 16px; + font-weight: bold; + margin-bottom: 4px; +} + +.book-author { + font-size: 14px; + color: #444; + margin-bottom: 8px; +} + +.book-created { + font-size: 12px; + color: #666; + margin-bottom: 10px; +} + +/* ----------------------------------------------------------------------- + ICON BUTTONS + ----------------------------------------------------------------------- */ + +.icon-btn { + width: 34px; + height: 34px; + border: none; + border-radius: 8px; + + display: flex; + justify-content: center; + align-items: center; + + font-size: 16px; + color: #fff; + cursor: pointer; + + transition: background 0.15s ease, transform 0.1s ease; +} + +/* Start (green) */ +.icon-start { + background: #2d8a3d; +} +.icon-start:hover { + background: #226c30; + transform: scale(1.05); +} + +.icon-start:disabled { + background: #9bbb9f !important; + cursor: not-allowed; + transform: none; + opacity: 0.5; +} + +/* Abort (red) */ +.icon-abort { + background: #c62828; +} +.icon-abort:hover { + background: #a31f1f; + transform: scale(1.05); +} + +.icon-abort:disabled { + background: #d8a0a0 !important; + cursor: not-allowed; + transform: none; + opacity: 0.5; +} + +/* Hide button (gray) */ +.hide-form { + position: absolute; + top: 6px; + right: 6px; + margin: 0; +} + +.icon-hide { + background: #777; +} +.icon-hide:hover { + background: #555; + transform: scale(1.05); +} +/* ----------------------------------------------------------------------- + BOOK ACTIONS (right aligned button row) + ----------------------------------------------------------------------- */ + +.book-actions { + display: flex; + justify-content: flex-end; /* rechts uitlijnen */ + gap: 10px; /* ruimte tussen knoppen */ + margin-top: 12px; +} diff --git a/bookscraper/static/css/dashboard.css b/bookscraper/static/css/dashboard.css index c3946d1..e7aa67a 100644 --- a/bookscraper/static/css/dashboard.css +++ b/bookscraper/static/css/dashboard.css @@ -2,33 +2,28 @@ File: static/css/dashboard.css Purpose: Clean full-width vertical dashboard layout with large log viewer. + Book-card CSS is now moved to bookcard.css ======================================================================= */ -/* ------------------------------ - GENERAL PAGE LAYOUT - ------------------------------ */ +/* ----------------------------------------------------------------------- + 1) GENERAL PAGE LAYOUT + ----------------------------------------------------------------------- */ -/* Dashboard content should use full width */ .dashboard-container { display: flex; flex-direction: column; width: 100%; - max-width: 1200px; /* voorkomt overflow rechts */ + max-width: 1200px; margin: 20px auto; padding: 0 20px; - gap: 18px; /* kleiner dan 30px */ + gap: 18px; } -/* ------------------------------ - SECTIONS (input, progress, logs) - ------------------------------ */ - .dashboard-section { background: #ffffff; - padding: 16px; /* kleiner */ + padding: 16px; border-radius: 6px; border: 1px solid #ddd; - margin: 0; /* weg extra witruimte */ } .page-title { @@ -36,9 +31,9 @@ margin-bottom: 15px; } -/* ------------------------------ - BOOK LIST (optional) - ------------------------------ */ +/* ----------------------------------------------------------------------- + 2) ACTIVE BOOK LIST (dashboard left panel) + ----------------------------------------------------------------------- */ .book-list { display: flex; @@ -52,7 +47,6 @@ color: #777; } -/* List item */ .book-list-item { padding: 12px 16px; background: #f7f7f7; @@ -73,7 +67,6 @@ border-color: #1e88e5; } -/* Title + metadata */ .book-title { font-size: 16px; font-weight: 600; @@ -84,13 +77,9 @@ color: #555; } -.meta-label { - font-weight: 600; -} - -/* ------------------------------ - PROGRESS BOX - ------------------------------ */ +/* ----------------------------------------------------------------------- + 3) PROGRESS BOX + ----------------------------------------------------------------------- */ .progress-box { background: #fafafa; @@ -141,14 +130,35 @@ margin-top: 4px; } -/* ------------------------------ - LOG VIEWER — LARGE FULL-WIDTH - ------------------------------ */ +.book-abort-area { + margin-top: 10px; + text-align: right; +} + +.abort-btn { + padding: 6px 12px; + border-radius: 4px; + border: 1px solid #cc0000; + background: #ff4444; + color: white; + font-size: 12px; + cursor: pointer; + transition: background 0.2s, border-color 0.2s; +} + +.abort-btn:hover { + background: #ff2222; + border-color: #aa0000; +} + +/* ----------------------------------------------------------------------- + 4) LOG VIEWER + ----------------------------------------------------------------------- */ .log-viewer { width: 100%; max-width: 100%; - overflow: hidden; /* voorkom horizontaal uitsteken */ + overflow: hidden; } .log-header { @@ -171,11 +181,11 @@ max-height: 75vh; overflow-y: auto; - overflow-x: hidden; /* voorkom dat de log naar rechts uitsteekt */ + overflow-x: hidden; - background: #000000; /* Pure terminal black */ - color: #00ff66; /* Matrix / retro green */ - border: 1px solid #0f0; /* neon green frame */ + background: #000; + color: #00ff66; + border: 1px solid #0f0; border-radius: 6px; padding: 12px; @@ -183,48 +193,39 @@ font-size: 13px; line-height: 1.35; - white-space: pre-wrap; /* wraps text */ - word-break: break-word; /* lange links breken */ + white-space: pre-wrap; + word-break: break-word; } -/* Basestijl voor alle logregels */ + .log-line { white-space: pre-wrap; padding: 2px 0; - font-family: "SF Mono", "Consolas", "Courier New", monospace; } - -/* Subklassen per logtype */ .log-line.default { - color: #00ff66; /* groen */ + color: #00ff66; } - .log-line.dl { - color: #00ccff; /* cyan */ + color: #00ccff; } - .log-line.parse { - color: #ffaa00; /* oranje */ + color: #ffaa00; } - .log-line.save { - color: #ffdd33; /* geel */ + color: #ffdd33; } - .log-line.audio { - color: #ff66ff; /* paars */ + color: #ff66ff; } - .log-line.ctrl { - color: #66aaff; /* lichtblauw */ + color: #66aaff; } - .log-line.error { - color: #ff3333; /* rood */ + color: #ff3333; } -/* ------------------------------ - PLACEHOLDER - ------------------------------ */ +/* ----------------------------------------------------------------------- + 5) PLACEHOLDER / FOOTER + ----------------------------------------------------------------------- */ .dashboard-placeholder { font-size: 15px; @@ -232,6 +233,7 @@ text-align: center; color: #777; } + .footer { text-align: center; padding: 12px; @@ -240,23 +242,56 @@ font-size: 12px; border-top: 1px solid #ddd; } -.book-abort-area { - margin-top: 10px; - text-align: right; +/* ----------------------------- + DROPDOWN NAVIGATION +------------------------------ */ + +/* Container for dropdown */ +.nav-dropdown { + position: relative; } -.abort-btn { - padding: 6px 12px; - border-radius: 4px; - border: 1px solid #cc0000; - background: #ff4444; - color: white; - font-size: 12px; +/* The clickable label ("Tools ▾") */ +.nav-dropdown > .nav-item { cursor: pointer; - transition: background 0.2s, border-color 0.2s; } -.abort-btn:hover { - background: #ff2222; - border-color: #aa0000; +/* Hide dropdown by default */ +.dropdown-menu { + display: none; + position: absolute; + top: 100%; + right: 0; + background: #fff; /* zelfde achtergrond als navbar */ + border: 1px solid #ddd; + padding: 8px 0; + margin: 0; + list-style: none; /* verwijder bolletjes */ + border-radius: 4px; + min-width: 160px; + z-index: 1000; +} + +/* Show dropdown when hovering over parent */ +.nav-dropdown:hover .dropdown-menu { + display: block; +} + +/* Menu item styling */ +.dropdown-menu li { + padding: 0; + margin: 0; +} + +.dropdown-menu li a { + display: block; + padding: 8px 16px; + white-space: nowrap; + color: #333; + text-decoration: none; +} + +/* Hover state */ +.dropdown-menu li a:hover { + background: #f0f0f0; } diff --git a/bookscraper/static/js/dashboard.js b/bookscraper/static/js/dashboard.js index 1f78b29..35340c0 100644 --- a/bookscraper/static/js/dashboard.js +++ b/bookscraper/static/js/dashboard.js @@ -2,15 +2,15 @@ File: static/js/dashboard.js Purpose: Dashboard interactions: - - select book - - refresh logs - - refresh progress + - Select active book + - Live logs & progress + - Bookcard AJAX start/abort NOTE: - $ / $$ / autoScroll komen uit helpers.js + updateLogs() is provided by log_view.js ======================================================================= */ /* --------------------------------------------------------- - Simple fetch wrapper + Utility: Safe fetch wrapper --------------------------------------------------------- */ async function apiGet(url) { try { @@ -32,79 +32,61 @@ let REFRESH_INTERVAL = null; console.log(">>> dashboard.js LOADED"); /* --------------------------------------------------------- - DOM Ready → setup + DOM READY --------------------------------------------------------- */ document.addEventListener("DOMContentLoaded", () => { console.log(">>> dashboard.js DOMContentLoaded"); - // ===================================================== - // GLOBAL FALLBACK POLLING — ALWAYS FETCH LOGS - // Runs when no books exist or no selection has been made - // ===================================================== - console.log(">>> dashboard.js: enabling global fallback polling"); + // Fallback: fetch global logs if no active book setInterval(() => { - // if no active book → fetch global logs - if (!ACTIVE_BOOK) { - refreshBook(null); // triggers /logs - } + if (!ACTIVE_BOOK) refreshBook(null); }, 2000); + // Sidebar items const items = $$(".book-list-item"); - console.log(">>> dashboard.js found book-list items:", items.length); - - // Geen boeken → geen polling starten - // if (!items || items.length === 0) { - // console.log(">>> dashboard.js: geen boeken aanwezig, polling uit."); - // return; - // } - - // Book selection listener items.forEach((item) => { item.addEventListener("click", () => { - console.log(">>> dashboard.js: user clicked book:", item.dataset.bookId); selectBook(item.dataset.bookId); }); }); - // Auto-select first book + // Auto-select if (!ACTIVE_BOOK && items[0]) { - console.log( - ">>> dashboard.js: auto-select first book:", - items[0].dataset.bookId - ); selectBook(items[0].dataset.bookId); } + + // Initial binding of book-card buttons + bindBookCardButtons(); + + // Refresh sidebar every 2 seconds + setInterval(refreshActiveBooks, 2800); }); /* --------------------------------------------------------- - Select a book (updates UI + starts polling) + Select a book --------------------------------------------------------- */ function selectBook(bookId) { - console.log(">>> selectBook(", bookId, ")"); - ACTIVE_BOOK = bookId; + console.log(">>> Selecting book", bookId); - // Highlight + // Highlight sidebar $$(".book-list-item").forEach((el) => { el.classList.toggle("active", el.dataset.bookId === bookId); }); - // Reset previous polling - if (REFRESH_INTERVAL) { - console.log(">>> dashboard.js: clearing previous polling interval"); - clearInterval(REFRESH_INTERVAL); - } + // Reset polling + if (REFRESH_INTERVAL) clearInterval(REFRESH_INTERVAL); - // Start new polling - console.log(">>> dashboard.js: starting polling for bookId =", bookId); REFRESH_INTERVAL = setInterval(() => { refreshBook(ACTIVE_BOOK); }, 2000); - // Immediate refresh refreshBook(ACTIVE_BOOK); } -setInterval(refreshActiveBooks, 2000); + +/* --------------------------------------------------------- + Refresh sidebar list + --------------------------------------------------------- */ async function refreshActiveBooks() { const books = await apiGet("/api/books"); if (!books) return; @@ -112,8 +94,8 @@ async function refreshActiveBooks() { const container = $("#book-list"); if (!container) return; - // Herbouw de lijst container.innerHTML = ""; + books.forEach((b) => { const div = document.createElement("div"); div.className = "book-list-item"; @@ -126,75 +108,152 @@ async function refreshActiveBooks() { ${b.download_done}/${b.download_total} downloaded, ${b.audio_done}/${b.audio_total} audio - - `; - // Event listener opnieuw koppelen div.addEventListener("click", () => selectBook(b.book_id)); - container.appendChild(div); }); - // Als ACTIVE_BOOK nog niet bekend → auto-selecteer eerste boek if (!ACTIVE_BOOK && books.length > 0) { selectBook(books[0].book_id); } } /* --------------------------------------------------------- - Fetch logs + progress from API + Fetch logs + progress --------------------------------------------------------- */ async function refreshBook(bookId) { - console.log(">>> refreshBook(", bookId, ")"); - - // 1) Als er GEEN bookId is → haal alleen globale logs op if (!bookId) { - console.log(">>> refreshBook: no active book → fetch /logs"); - const data = await apiGet("/logs"); - if (data && data.logs) updateLogs(data.logs); - - return; // klaar + if (data) updateLogs(data); + return; } - // 2) Als er WEL een boek is → haal book status + logs op const state = await apiGet(`/api/book/${bookId}/status`); const logs = await apiGet(`/api/book/${bookId}/logs`); - console.log(">>> refreshBook state =", state); - console.log(">>> refreshBook logs =", logs); - - if (state) updateProgressBars(state); + if (state) { + updateProgressBars(state); + refreshBookCards(); + } if (logs) updateLogs(logs); } /* --------------------------------------------------------- - Update LOG VIEW panel + BOOKCARD BUTTON BINDING — idempotent --------------------------------------------------------- */ -function updateLogs(logList) { - const output = $("#log-output"); - if (!output) { - console.warn(">>> updateLogs: no #log-output element found"); - return; - } +function bindBookCardButtons() { + console.log(">>> bindBookCardButtons() scanning…"); + + // START BUTTONS + document.querySelectorAll(".book-card .icon-start").forEach((btn) => { + if (btn.dataset.bound === "1") return; // prevent double-binding + btn.dataset.bound = "1"; + + btn.addEventListener("click", (ev) => { + ev.preventDefault(); + if (btn.disabled) return; + + const bookId = btn.closest(".book-card").dataset.bookId; + console.log(">>> START clicked:", bookId); + + startBook(bookId); + }); + }); + + // ABORT BUTTONS + document.querySelectorAll(".book-card .icon-abort").forEach((btn) => { + if (btn.dataset.bound === "1") return; + btn.dataset.bound = "1"; - output.innerHTML = ""; + btn.addEventListener("click", (ev) => { + ev.preventDefault(); + if (btn.disabled) return; - logList.forEach((line) => logAppend(line)); + const bookId = btn.closest(".book-card").dataset.bookId; + console.log(">>> ABORT clicked:", bookId); + + abortBookAjax(bookId); + }); + }); +} - autoScroll(output); +/* --------------------------------------------------------- + AJAX START + --------------------------------------------------------- */ +function startBook(bookId) { + console.log(">>> startBook():", bookId); + + fetch("/start", { + method: "POST", + headers: { "Content-Type": "application/x-www-form-urlencoded" }, + body: `book_id=${bookId}`, + }) + .then(async (r) => { + console.log(">>> /start status:", r.status); + + let data = null; + try { + data = await r.json(); + } catch (e) {} + + console.log(">>> /start response:", data); + refreshBookCards(); + refreshBook(bookId); + }) + .catch((err) => console.error("Start failed:", err)); } -function abortBook(book_id) { - if (!confirm(`Abort tasks for book ${book_id}?`)) return; +/* --------------------------------------------------------- + AJAX ABORT + --------------------------------------------------------- */ +function abortBookAjax(bookId) { + if (!confirm(`Abort tasks for book ${bookId}?`)) return; + + console.log(">>> abortBookAjax():", bookId); + + fetch(`/abort/${bookId}`, { method: "POST" }) + .then(async (r) => { + let data = null; + try { + data = await r.json(); + } catch (e) {} + console.log(">>> /abort response:", data); - fetch(`/abort/${book_id}`, { method: "POST" }) - .then((r) => r.json()) - .then((data) => { - console.log("Abort:", data); + refreshBookCards(); + refreshBook(bookId); }) - .catch((err) => { - console.error("Abort failed:", err); - }); + .catch((err) => console.error("Abort failed:", err)); +} + +/* --------------------------------------------------------- + Refresh all book-cards (status, classes, buttons) + --------------------------------------------------------- */ +async function refreshBookCards() { + const books = await apiGet("/api/books"); + if (!books) return; + + document.querySelectorAll(".book-card").forEach((card) => { + const id = card.dataset.bookId; + const info = books.find((b) => b.book_id === id); + if (!info) return; + + // Status CSS + card.className = `book-card ${info.status}`; + + // Button states + const startBtn = card.querySelector(".icon-start"); + const abortBtn = card.querySelector(".icon-abort"); + + if (startBtn) startBtn.disabled = info.status !== "registered"; + if (abortBtn) + abortBtn.disabled = ![ + "processing", + "downloading", + "parsing", + "audio", + ].includes(info.status); + }); + + bindBookCardButtons(); // rebind new DOM } diff --git a/bookscraper/static/js/log_view.js b/bookscraper/static/js/log_view.js index ba4cff7..2191704 100644 --- a/bookscraper/static/js/log_view.js +++ b/bookscraper/static/js/log_view.js @@ -1,38 +1,36 @@ /* ======================================================================= File: static/js/log_view.js Purpose: - Log viewer functionality: - - filtering - - clearing - - auto-scroll - - delta polling (efficient) - - rolling limit (prevent GUI freeze) + High-performance rolling log viewer + - efficient delta polling + - append-only mode (no DOM reset) + - rolling limit (prevents memory freeze) + - supports both global logs and per-book logs ======================================================================= */ console.log(">>> log_view.js LOADING…"); /* --------------------------------------------------------- - Log filtering + Global log viewer state --------------------------------------------------------- */ let LOG_FILTER = "ALL"; -let LAST_LOG_INDEX = -1; // For delta polling -const MAX_LOG_LINES = 1000; // Rolling cap to prevent freezing +let LAST_LOG_INDEX = -1; // delta offset +const MAX_LOG_LINES = 600; // safe rolling window +/* --------------------------------------------------------- + Apply filter on existing log lines + --------------------------------------------------------- */ function applyLogFilter() { - console.log(">>> log_view.js applyLogFilter(), filter =", LOG_FILTER); - const lines = $$(".log-line"); - console.log(">>> log_view.js number of log-line elements:", lines.length); - lines.forEach((line) => { const text = line.innerText; - line.style.display = - LOG_FILTER === "ALL" || text.includes(LOG_FILTER) ? "block" : "none"; + const show = LOG_FILTER === "ALL" || (text && text.includes(LOG_FILTER)); + line.style.display = show ? "block" : "none"; }); } /* --------------------------------------------------------- - UI bindings + DOM Ready — bind clear/filter --------------------------------------------------------- */ document.addEventListener("DOMContentLoaded", () => { console.log(">>> log_view.js DOMContentLoaded"); @@ -41,84 +39,107 @@ document.addEventListener("DOMContentLoaded", () => { const clearBtn = $("#log-clear"); const output = $("#log-output"); - if (!filterSel) { - console.log(">>> log_view.js: No log viewer found on this page."); + if (!output) { + console.log( + ">>> log_view.js: No #log-output on this page → viewer disabled" + ); return; } console.log(">>> log_view.js: log viewer detected."); - // Filter dropdown - // filterSel.addEventListener("change", () => { - // LOG_FILTER = filterSel.value; - // console.log(">>> log_view.js filter changed to:", LOG_FILTER); - // applyLogFilter(); - // }); + // Filter dropdown (currently disabled in your UI) + // if (filterSel) { + // filterSel.addEventListener("change", () => { + // LOG_FILTER = filterSel.value; + // applyLogFilter(); + // }); + // } - // Clear log window if (clearBtn) { clearBtn.addEventListener("click", () => { - console.log(">>> log_view.js log-clear clicked → clearing output"); - if (output) { - output.innerHTML = ""; - LAST_LOG_INDEX = -1; // reset delta polling - } + console.log(">>> log_view.js: Clear log viewer"); + output.innerHTML = ""; + LAST_LOG_INDEX = -1; // reset delta polling }); } }); /* --------------------------------------------------------- - Append + Rolling buffer + Append ONE line (smart class assignment) --------------------------------------------------------- */ -function logAppend(lineText) { +function rollingAppend(lineText) { const output = $("#log-output"); if (!output) return; const div = document.createElement("div"); div.classList.add("log-line"); - // ----------------------------------------------------- - // Assign subtype classes - // ----------------------------------------------------- - if (lineText.includes("[DL]") || lineText.includes("[DOWNLOAD]")) { + // Type detection + if (lineText.includes("[DL]") || lineText.includes("[DOWNLOAD]")) div.classList.add("dl"); - } else if (lineText.includes("[PARSE]")) { - div.classList.add("parse"); - } else if (lineText.includes("[SAVE]")) { - div.classList.add("save"); - } else if (lineText.includes("[AUDIO]")) { - div.classList.add("audio"); - } else if (lineText.includes("[CTRL]")) { - div.classList.add("ctrl"); - } else if (lineText.includes("[ERROR]")) { - div.classList.add("error"); - } else { - div.classList.add("default"); - } + else if (lineText.includes("[PARSE]")) div.classList.add("parse"); + else if (lineText.includes("[SAVE]")) div.classList.add("save"); + else if (lineText.includes("[AUDIO]")) div.classList.add("audio"); + else if (lineText.includes("[CTRL]")) div.classList.add("ctrl"); + else if (lineText.includes("[ERROR]")) div.classList.add("error"); + else div.classList.add("default"); + + div.textContent = lineText; - div.innerText = lineText; output.appendChild(div); - // Rolling buffer - while (output.children.length > MAX_LOG_LINES) { + // Rolling limit + while (output.childNodes.length > MAX_LOG_LINES) { output.removeChild(output.firstChild); } +} + +/* --------------------------------------------------------- + Primary API entry: updateLogs() + Used by dashboard.js AND delta polling + Accepts: + { logs: [...], last_index: N } + OR legacy: + { lines: [...], total: N } + --------------------------------------------------------- */ +function updateLogs(packet) { + const output = $("#log-output"); + if (!output) return; + + if (!packet) return; + + // Normalized log arrays + let lines = packet.logs || packet.lines || []; + if (!Array.isArray(lines)) return; + + // Append only new lines + lines.forEach((line) => rollingAppend(line)); + + // Update delta index + if (packet.last_index !== undefined) { + LAST_LOG_INDEX = packet.last_index; + } else if (packet.total !== undefined) { + LAST_LOG_INDEX = packet.total - 1; + } applyLogFilter(); autoScroll(output); } /* --------------------------------------------------------- - Delta-based log polling + Delta polling: ONLY global logs use this + Dashboard overrides logs per book. --------------------------------------------------------- */ function pollLogs() { fetch(`/logs?last_index=${LAST_LOG_INDEX}`) .then((r) => r.json()) .then((data) => { const lines = data.lines || []; + if (lines.length > 0) { lines.forEach((line) => logAppend(line)); - LAST_LOG_INDEX = data.total - 1; + LAST_LOG_INDEX = data.last; // <-- DE JUISTE INDEX! } }) .catch((err) => { @@ -126,7 +147,6 @@ function pollLogs() { }); } -// Poll every 800 ms -setInterval(pollLogs, 1800); +setInterval(pollLogs, 2800); console.log(">>> log_view.js LOADED"); diff --git a/bookscraper/templates/components/bookcard.html b/bookscraper/templates/components/bookcard.html new file mode 100644 index 0000000..9a88862 --- /dev/null +++ b/bookscraper/templates/components/bookcard.html @@ -0,0 +1,82 @@ +{# ============================================================ + File: templates/components/bookcard.html + Purpose: + Eén enkele boekkaart met: + - status styles + - cover + - metadata + - hide button + - start (play) + - abort (stop) + Requires: + variable "b" in context + ============================================================ #} + +
+ + +
+ +
+ + +
+ {% if b.cover_path %} + cover + {% else %} +
?
+ {% endif %} +
+ + +
+
{{ b.title }}
+
{{ b.author }}
+
Geregistreerd: {{ b.created_at }}
+ +
+ +
+ + +
+ + +
+ + +
+
+ +
+ +
diff --git a/bookscraper/templates/components/nav.html b/bookscraper/templates/components/nav.html index 1653bca..10e6485 100644 --- a/bookscraper/templates/components/nav.html +++ b/bookscraper/templates/components/nav.html @@ -1,16 +1,16 @@ diff --git a/bookscraper/templates/components/registered_books.html b/bookscraper/templates/components/registered_books.html new file mode 100644 index 0000000..09dafe2 --- /dev/null +++ b/bookscraper/templates/components/registered_books.html @@ -0,0 +1,21 @@ +{# ============================================================ File: +templates/components/registered_books.html Purpose: Toon een grid van +geregistreerde boeken. Elke kaart wordt gerenderd via bookcard.html. +============================================================ #} + +
+

Geregistreerde boeken

+ + {% if registered and registered|length > 0 %} + +
+ {% for b in registered %} {% include "components/bookcard.html" %} {% endfor + %} +
+ + {% else %} + +

Geen geregistreerde boeken.

+ + {% endif %} +
diff --git a/bookscraper/templates/components/url_input.html b/bookscraper/templates/components/url_input.html index 634e2f2..d69db52 100644 --- a/bookscraper/templates/components/url_input.html +++ b/bookscraper/templates/components/url_input.html @@ -5,7 +5,7 @@ Used on landing pages or detail pages. ======================================================================= --> -
+ + {% include "components/registered_books.html" %} + +
diff --git a/bookscraper/templates/debug/inspect_state.html b/bookscraper/templates/debug/inspect_state.html new file mode 100644 index 0000000..25a757b --- /dev/null +++ b/bookscraper/templates/debug/inspect_state.html @@ -0,0 +1,88 @@ +{% extends "layout.html" %} {% block content %} + +

State Inspection (SQL vs Redis)

+ + + +{% macro cmp(sqlval, redisval) %} {% if (sqlval|string) == (redisval|string) %} +{{ sqlval }} +{{ redisval }} +{% else %} +{{ sqlval }} +{{ redisval }} +{% endif %} {% endmacro %} {% for entry in results %} +
+
📘 {{ entry.book_id }}
+ + {% set sql = entry.sqlite %} {% set redis = entry.redis %} {% set merged = + entry.would_merge_to %} + + + + + + + + + + {% for field in [ "status", "chapters_total", "downloaded", + "chapters_download_done", "chapters_download_skipped", "parsed", + "chapters_parsed_done", "audio_done", "audio_skipped", "last_update" ] %} + + + + + + + + + + {% endfor %} +
FieldSQLiteRedisMerged Result
{{ field }}{{ sql.get(field, '') }}{{ redis.get(field, '') }}{{ merged.get(field, '') }}
+
+{% endfor %} {% endblock %} diff --git a/bookscraper/templates/debug/queues.html b/bookscraper/templates/debug/queues.html new file mode 100644 index 0000000..bb2dec9 --- /dev/null +++ b/bookscraper/templates/debug/queues.html @@ -0,0 +1,91 @@ +{% extends "layout.html" %} {% block content %} +

Celery Queue Debug

+ + + +
+

Workers

+ +

Active Tasks

+
{{ workers_active | tojson(indent=2) }}
+ +

Reserved

+
{{ workers_reserved | tojson(indent=2) }}
+ +

Scheduled

+
{{ workers_scheduled | tojson(indent=2) }}
+
+ +
+ +
+

Queues

+ + {% for q in queues %} +
+

{{ q.name }} ({{ q.length }} items)

+ + + + + + + + + + + + + + + + +
Redis Key{{ q.redis_key }}
Length{{ q.length }}
Items (first 30) + {% if q["items"] %} +
    + {% for item in q["items"] %} +
  • {{ item | e }}
  • + {% endfor %} +
+ {% else %} + No items + {% endif %} +
+
+ {% endfor %} +
+ + + +{% endblock %} diff --git a/bookscraper/templates/layout.html b/bookscraper/templates/layout.html index af2c248..14d6788 100644 --- a/bookscraper/templates/layout.html +++ b/bookscraper/templates/layout.html @@ -14,6 +14,12 @@ + + + diff --git a/bookscraper/worker/downloader.py b/bookscraper/worker/downloader.py index 6ad8378..e0216cb 100644 --- a/bookscraper/worker/downloader.py +++ b/bookscraper/worker/downloader.py @@ -5,7 +5,7 @@ import requests from io import BytesIO from bs4 import BeautifulSoup from scraper.logger import log_debug -from scraper.utils import clean_text +from scraper.utils.utils import clean_text from urllib.parse import urljoin @@ -103,8 +103,11 @@ class ChapterDownloader: collecting = True continue - text = sib.get_text("\n", strip=True) if hasattr( - sib, "get_text") else str(sib).strip() + text = ( + sib.get_text("\n", strip=True) + if hasattr(sib, "get_text") + else str(sib).strip() + ) if text: parts.append(text) @@ -121,6 +124,7 @@ class ChapterDownloader: vdir = f"{output_base}/v{volume}" import os + os.makedirs(vdir, exist_ok=True) fname = f"{number:05d}_{title}.txt"