Compare commits
2 Commits
f7f08fa45c
...
feb8ca60d7
| Author | SHA1 | Date |
|---|---|---|
|
|
feb8ca60d7 | 1 week ago |
|
|
292c9246a1 | 1 week ago |
@ -0,0 +1,79 @@
|
||||
# ============================================================
|
||||
# File: db/state_redis.py
|
||||
# Purpose:
|
||||
# Low-level Redis counters/state for BookScraper.
|
||||
# Used ONLY by db.repository façade.
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import time
|
||||
import redis
|
||||
|
||||
from logbus.publisher import log
|
||||
|
||||
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
|
||||
r = redis.Redis.from_url(REDIS_URL, decode_responses=True)
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# STATUS
|
||||
# ------------------------------------------------------------
|
||||
def redis_set_status(book_id: str, status: str):
|
||||
key = f"book:{book_id}:state"
|
||||
r.hset(key, "status", status)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# TOTAL CHAPTERS
|
||||
# ------------------------------------------------------------
|
||||
def redis_set_chapters_total(book_id: str, total: int):
|
||||
key = f"book:{book_id}:state"
|
||||
r.hset(key, "chapters_total", total)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# DOWNLOAD COUNTERS
|
||||
# ------------------------------------------------------------
|
||||
def redis_inc_download_done(book_id: str, amount: int = 1):
|
||||
key = f"book:{book_id}:state"
|
||||
r.hincrby(key, "chapters_download_done", amount)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
def redis_inc_download_skipped(book_id: str, amount: int = 1):
|
||||
log(f"[DB-REDIS] Incrementing download skipped for {book_id} by {amount}")
|
||||
key = f"book:{book_id}:state"
|
||||
r.hincrby(key, "chapters_download_skipped", amount)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PARSE COUNTERS
|
||||
# ------------------------------------------------------------
|
||||
def redis_inc_parsed_done(book_id: str, amount: int = 1):
|
||||
key = f"book:{book_id}:state"
|
||||
r.hincrby(key, "chapters_parsed_done", amount)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# AUDIO COUNTERS
|
||||
# ------------------------------------------------------------
|
||||
def redis_inc_audio_done(book_id: str, amount: int = 1):
|
||||
log(f"[DB-REDIS] Incrementing audio done for {book_id} by {amount}")
|
||||
key = f"book:{book_id}:state"
|
||||
r.hincrby(key, "audio_done", amount)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
|
||||
|
||||
def redis_inc_audio_skipped(book_id: str, amount: int = 1):
|
||||
log(f"[DB-REDIS] Incrementing audio skipped for {book_id} by {amount}")
|
||||
"""
|
||||
New: Count skipped audio chapters (timeouts, pre-existing files, abort, etc.)
|
||||
SQL does NOT track this; Redis-only metric.
|
||||
"""
|
||||
key = f"book:{book_id}:state"
|
||||
r.hincrby(key, "audio_skipped", amount)
|
||||
r.hset(key, "last_update", int(time.time()))
|
||||
@ -0,0 +1,165 @@
|
||||
# ============================================================
|
||||
# File: db/state_sql.py
|
||||
# Purpose:
|
||||
# Low-level SQLite snapshot layer for BookScraper metadata.
|
||||
# Used ONLY through db.repository façade.
|
||||
# ============================================================
|
||||
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
from logbus.publisher import log
|
||||
|
||||
DB_PATH = os.getenv("BOOKSCRAPER_DB", "/app/db/books.db")
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# INTERNAL HELPERS
|
||||
# ------------------------------------------------------------
|
||||
def _connect():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# FETCH
|
||||
# ------------------------------------------------------------
|
||||
def sql_fetch_book(book_id):
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT * FROM books WHERE book_id = ?", (book_id,))
|
||||
row = cur.fetchone()
|
||||
conn.close()
|
||||
return dict(row) if row else None
|
||||
|
||||
|
||||
def sql_fetch_all_books():
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT * FROM books ORDER BY rowid DESC")
|
||||
rows = cur.fetchall()
|
||||
conn.close()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# REGISTER / UPDATE
|
||||
# ------------------------------------------------------------
|
||||
def sql_register_book(book_id, fields: dict):
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
cols = ", ".join(["book_id"] + list(fields.keys()))
|
||||
placeholders = ", ".join(["?"] * (1 + len(fields)))
|
||||
values = [book_id] + list(fields.values())
|
||||
|
||||
cur.execute(
|
||||
f"INSERT OR REPLACE INTO books ({cols}) VALUES ({placeholders})", values
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def sql_update_book(book_id, fields: dict):
|
||||
if not fields:
|
||||
return
|
||||
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
set_clause = ", ".join([f"{k} = ?" for k in fields])
|
||||
params = list(fields.values()) + [book_id]
|
||||
|
||||
cur.execute(f"UPDATE books SET {set_clause} WHERE book_id = ?", params)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# STATUS
|
||||
# ------------------------------------------------------------
|
||||
def sql_set_status(book_id, status: str):
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute("UPDATE books SET status = ? WHERE book_id = ?", (status, book_id))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# CHAPTER TOTAL (snapshot)
|
||||
# ------------------------------------------------------------
|
||||
def sql_set_chapters_total(book_id, total: int):
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"UPDATE books SET chapters_total = ? WHERE book_id = ?", (total, book_id)
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# COUNTERS (SNAPSHOT-ONLY)
|
||||
# ------------------------------------------------------------
|
||||
def sql_inc_downloaded(book_id, amount=1):
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE books
|
||||
SET downloaded = COALESCE(downloaded,0) + ?
|
||||
WHERE book_id = ?
|
||||
""",
|
||||
(amount, book_id),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def sql_inc_parsed(book_id, amount=1):
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE books
|
||||
SET parsed = COALESCE(parsed,0) + ?
|
||||
WHERE book_id = ?
|
||||
""",
|
||||
(amount, book_id),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def sql_inc_audio_done(book_id, amount=1):
|
||||
log(f"[DB-SQL] Incrementing audio done for {book_id} by {amount}")
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE books
|
||||
SET audio_done = COALESCE(audio_done,0) + ?
|
||||
WHERE book_id = ?
|
||||
""",
|
||||
(amount, book_id),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def sql_inc_audio_skipped(book_id, amount=1):
|
||||
log(f"[DB-SQL] Incrementing audio skipped for {book_id} by {amount}")
|
||||
conn = _connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE books
|
||||
SET audio_skipped = COALESCE(audio_skipped,0) + ?
|
||||
WHERE book_id = ?
|
||||
""",
|
||||
(amount, book_id),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
@ -0,0 +1,27 @@
|
||||
# ============================================================
|
||||
# File: scraper/engine/fetcher.py
|
||||
# Purpose:
|
||||
# Low-level HTML fetch utility shared by all site scrapers.
|
||||
# Replaces scattered _fetch() logic inside BookScraper.
|
||||
# ============================================================
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:118.0) "
|
||||
"Gecko/20100101 Firefox/118.0"
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
def fetch_html(url: str, encoding: str = "utf-8", timeout: int = 10) -> BeautifulSoup:
|
||||
"""
|
||||
Fetch HTML with a consistent user-agent and encoding.
|
||||
Returns BeautifulSoup(lxml).
|
||||
"""
|
||||
resp = requests.get(url, headers=HEADERS, timeout=timeout)
|
||||
resp.encoding = encoding
|
||||
return BeautifulSoup(resp.text, "lxml")
|
||||
@ -0,0 +1,65 @@
|
||||
# ============================================================
|
||||
# File: scraper/engine/parser.py
|
||||
# Purpose:
|
||||
# High-level scraping API coordinating metadata extraction
|
||||
# and chapter extraction using pluggable SiteScraper classes.
|
||||
#
|
||||
# This is the new central engine:
|
||||
# - extract_metadata_only() used by INIT flow
|
||||
# - extract_metadata_full() used by full scraping pipeline
|
||||
# ============================================================
|
||||
|
||||
from scraper.engine.fetcher import fetch_html
|
||||
|
||||
|
||||
def extract_metadata_only(url: str, site_scraper):
|
||||
"""
|
||||
Extract ONLY lightweight metadata:
|
||||
- title
|
||||
- author
|
||||
- description
|
||||
- cover_url
|
||||
- chapters_total = 0
|
||||
"""
|
||||
soup = fetch_html(url, site_scraper.encoding)
|
||||
|
||||
title = site_scraper.parse_title(soup)
|
||||
author = site_scraper.parse_author(soup)
|
||||
description = site_scraper.parse_description(soup)
|
||||
cover_url = site_scraper.parse_cover(soup, url)
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"author": author,
|
||||
"description": description,
|
||||
"cover_url": cover_url,
|
||||
"chapters_total": 0,
|
||||
"book_url": url,
|
||||
}
|
||||
|
||||
|
||||
def extract_metadata_full(url: str, site_scraper):
|
||||
"""
|
||||
Full scraping (metadata + chapterlist).
|
||||
Used by the scraping Celery pipeline.
|
||||
"""
|
||||
soup = fetch_html(url, site_scraper.encoding)
|
||||
|
||||
# metadata
|
||||
meta = extract_metadata_only(url, site_scraper)
|
||||
|
||||
# chapter list
|
||||
chapter_page_url = site_scraper.extract_chapter_page_url(soup)
|
||||
chapter_page_soup = fetch_html(chapter_page_url, site_scraper.encoding)
|
||||
chapters = site_scraper.parse_chapter_list(chapter_page_soup)
|
||||
|
||||
meta["chapters"] = chapters
|
||||
return meta
|
||||
|
||||
|
||||
def build_book_id(title: str) -> str:
|
||||
"""
|
||||
Canonical book_id generator.
|
||||
SCRAPE currently uses title as ID → preserve that behavior.
|
||||
"""
|
||||
return title
|
||||
@ -0,0 +1,33 @@
|
||||
# ============================================================
|
||||
# File: scraper/logger_decorators.py
|
||||
# Purpose: Function-call logging decorator
|
||||
# ============================================================
|
||||
|
||||
from functools import wraps
|
||||
from scraper.logger import log_debug
|
||||
|
||||
|
||||
def logcall(func):
|
||||
"""
|
||||
Decorator: log function name + arguments every time it's called.
|
||||
Usage: @logcall above any function.
|
||||
"""
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
# Naam van de functie
|
||||
name = func.__qualname__
|
||||
|
||||
# Eerste logregel vóór uitvoering
|
||||
# log_debug(f"[CALL] {name} args={args} kwargs={kwargs}")
|
||||
log_debug(f"[CALL] {name} args={args}")
|
||||
# log_debug(f"[CALL] {name}")
|
||||
|
||||
result = func(*args, **kwargs)
|
||||
|
||||
# Log ná uitvoering
|
||||
# log_debug(f"[RETURN] {name} → {result}")
|
||||
|
||||
return result
|
||||
|
||||
return wrapper
|
||||
@ -0,0 +1,45 @@
|
||||
# ============================================================
|
||||
# File: scraper/services/cover_service.py
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import requests
|
||||
from logbus.publisher import log
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class CoverService:
|
||||
|
||||
@staticmethod
|
||||
def download_main_cover(cover_url: str, book_id: str) -> Optional[str]:
|
||||
"""
|
||||
Downloads cover image into: static/covers/<book_id>.jpg.
|
||||
Returns local path or None.
|
||||
"""
|
||||
|
||||
if not cover_url:
|
||||
log(f"[COVER] No cover URL for book={book_id}")
|
||||
return None
|
||||
|
||||
static_dir = os.path.join("static", "covers")
|
||||
os.makedirs(static_dir, exist_ok=True)
|
||||
|
||||
dst_path = os.path.join(static_dir, f"{book_id}.jpg")
|
||||
|
||||
try:
|
||||
log(f"[COVER] Downloading: {cover_url}")
|
||||
|
||||
resp = requests.get(
|
||||
cover_url, timeout=10, headers={"User-Agent": "Mozilla/5.0"}
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
with open(dst_path, "wb") as f:
|
||||
f.write(resp.content)
|
||||
|
||||
log(f"[COVER] Stored: {dst_path}")
|
||||
return dst_path
|
||||
|
||||
except Exception as e:
|
||||
log(f"[COVER] FAILED ({cover_url}) → {e}")
|
||||
return None
|
||||
@ -0,0 +1,81 @@
|
||||
# ============================================================
|
||||
# File: scraper/services/init_service.py
|
||||
# Purpose:
|
||||
# Orchestrate INIT-flow:
|
||||
# - resolve site
|
||||
# - fetch minimal metadata
|
||||
# - derive book_id
|
||||
# - register in SQLite
|
||||
# - store main cover
|
||||
# ============================================================
|
||||
|
||||
import re
|
||||
from scraper.services.site_resolver import SiteResolver
|
||||
from scraper.services.scrape_engine import ScrapeEngine
|
||||
from scraper.services.cover_service import CoverService
|
||||
|
||||
from db.repository import register_book
|
||||
|
||||
from scraper.logger_decorators import logcall
|
||||
|
||||
|
||||
class InitService:
|
||||
|
||||
@staticmethod
|
||||
@logcall
|
||||
def derive_book_id(url: str) -> str:
|
||||
"""
|
||||
PTWXZ URL format ends with /{id}.html.
|
||||
If no match → fallback to sanitized URL.
|
||||
"""
|
||||
m = re.search(r"/(\d+)\.html$", url)
|
||||
if m:
|
||||
return m.group(1)
|
||||
return url.replace("/", "_")
|
||||
|
||||
@staticmethod
|
||||
@logcall
|
||||
def execute(url: str) -> dict:
|
||||
"""
|
||||
Main INIT-flow entry point.
|
||||
Returns complete metadata + registration info.
|
||||
"""
|
||||
|
||||
# 1) Determine site
|
||||
site = SiteResolver.resolve(url)
|
||||
|
||||
book_id = InitService.derive_book_id(url)
|
||||
|
||||
site.book_id = book_id
|
||||
# 2) Metadata only
|
||||
meta = ScrapeEngine.fetch_metadata_only(site, url)
|
||||
|
||||
title = meta.get("title") or "Unknown"
|
||||
author = meta.get("author")
|
||||
description = meta.get("description")
|
||||
cover_url = meta.get("cover_url")
|
||||
|
||||
# 4) Download UI cover (NEW: capture returned local path)
|
||||
cover_path = CoverService.download_main_cover(cover_url, book_id)
|
||||
|
||||
# 5) SQLite registration INCLUDING cover_path ← ★ FIX
|
||||
register_book(
|
||||
book_id=book_id,
|
||||
title=title,
|
||||
author=author,
|
||||
description=description,
|
||||
cover_url=cover_url,
|
||||
cover_path=cover_path, # ← ★ BELANGRIJK
|
||||
book_url=url,
|
||||
)
|
||||
|
||||
# 6) Output for UI
|
||||
return {
|
||||
"book_id": book_id,
|
||||
"title": title,
|
||||
"author": author,
|
||||
"description": description,
|
||||
"cover_url": cover_url,
|
||||
"cover_path": cover_path, # ← handig voor UI
|
||||
"status": "registered",
|
||||
}
|
||||
@ -0,0 +1,20 @@
|
||||
# ============================================================
|
||||
# File: scraper/services/site_resolver.py
|
||||
# Purpose:
|
||||
# Determine which BookSite implementation applies for a given URL.
|
||||
# This keeps INIT-flow and SCRAPE-flow site-agnostic.
|
||||
# ============================================================
|
||||
|
||||
from scraper.sites import BookSite # current PTWXZ implementation
|
||||
|
||||
|
||||
class SiteResolver:
|
||||
"""
|
||||
Resolves the correct BookSite class based on URL.
|
||||
Currently only PTWXZ/Piaotian is supported.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def resolve(url: str):
|
||||
# Later: add more domain rules for other sources
|
||||
return BookSite()
|
||||
@ -0,0 +1,28 @@
|
||||
# ============================================================
|
||||
# File: scraper/sites/__init__.py
|
||||
# Purpose:
|
||||
# Site autodetection based on URL.
|
||||
# ============================================================
|
||||
|
||||
from scraper.sites.piaotian import PiaotianScraper
|
||||
|
||||
|
||||
def get_scraper_for_url(url: str):
|
||||
"""
|
||||
Return the correct scraper instance for a given URL.
|
||||
Later: add more site implementations.
|
||||
"""
|
||||
if "ptwxz" in url or "piaotian" in url:
|
||||
return PiaotianScraper()
|
||||
|
||||
raise ValueError(f"No scraper available for URL: {url}")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Backwards-compatibility export for legacy BookScraper
|
||||
# ============================================================
|
||||
# Old code expects:
|
||||
# from scraper.sites import BookSite
|
||||
# We map that to our new PiaotianScraper implementation.
|
||||
|
||||
BookSite = PiaotianScraper
|
||||
@ -0,0 +1,52 @@
|
||||
# ============================================================
|
||||
# File: scraper/sites/base.py
|
||||
# Purpose:
|
||||
# Abstract interface that every site-specific scraper must implement.
|
||||
# ============================================================
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class SiteScraper(ABC):
|
||||
"""
|
||||
Defines the interface for site-specific scrapers.
|
||||
Each concrete scraper (Piaotian, Biquge, etc.) must implement these.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def root(self) -> str: ...
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def encoding(self) -> str: ...
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def chapter_list_selector(self) -> str: ...
|
||||
|
||||
# --------------------------
|
||||
# Metadata extraction
|
||||
# --------------------------
|
||||
@abstractmethod
|
||||
def parse_title(self, soup: BeautifulSoup) -> str: ...
|
||||
|
||||
@abstractmethod
|
||||
def parse_author(self, soup: BeautifulSoup) -> str: ...
|
||||
|
||||
@abstractmethod
|
||||
def parse_description(self, soup: BeautifulSoup) -> str: ...
|
||||
|
||||
@abstractmethod
|
||||
def parse_cover(self, soup: BeautifulSoup, url: str) -> Optional[str]: ...
|
||||
|
||||
# --------------------------
|
||||
# Chapter extraction
|
||||
# --------------------------
|
||||
@abstractmethod
|
||||
def extract_chapter_page_url(self, soup: BeautifulSoup) -> str: ...
|
||||
|
||||
@abstractmethod
|
||||
def parse_chapter_list(self, soup: BeautifulSoup) -> list: ...
|
||||
@ -1,57 +0,0 @@
|
||||
# ============================================================
|
||||
# File: scraper/tasks/progress_tasks.py
|
||||
# Purpose: Central progress updater for chapter pipelines.
|
||||
# Updated for chapter_dict pipeline model.
|
||||
# ============================================================
|
||||
|
||||
from celery_app import celery_app
|
||||
from scraper.progress import inc_completed, inc_skipped, inc_failed
|
||||
from logbus.publisher import log
|
||||
|
||||
print(">>> [IMPORT] progress_tasks.py loaded")
|
||||
|
||||
|
||||
@celery_app.task(bind=False, name="progress.update", queue="controller")
|
||||
def update_progress(result: dict, book_id: str):
|
||||
"""
|
||||
Central progress logic:
|
||||
- result: output of save_chapter
|
||||
- book_id: explicitly passed by pipeline
|
||||
|
||||
IMPORTANT:
|
||||
- save_chapter already updates counters for skipped & normal chapters
|
||||
- progress.update MUST NOT double-increment
|
||||
"""
|
||||
|
||||
ch = result.get("chapter") or {}
|
||||
chapter_num = ch.get("num")
|
||||
|
||||
skipped = result.get("skipped", False)
|
||||
failed = result.get("failed", False)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# FAILED CASE
|
||||
# ------------------------------------------------------------
|
||||
if failed:
|
||||
inc_failed(book_id)
|
||||
log(f"[PROG] FAILED chapter {chapter_num}")
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# SKIPPED CASE
|
||||
# ------------------------------------------------------------
|
||||
if skipped:
|
||||
# save_chapter already did:
|
||||
# inc_skipped(book_id)
|
||||
log(f"[PROG] SKIPPED chapter {chapter_num}")
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# NORMAL COMPLETION
|
||||
# ------------------------------------------------------------
|
||||
# save_chapter did NOT increment completed for skipped cases
|
||||
# but DID inc_completed(book_id) for normal cases.
|
||||
# update_progress should NOT double increment, so only log here.
|
||||
log(f"[PROG] DONE chapter {chapter_num}")
|
||||
|
||||
return result
|
||||
@ -1,139 +1,83 @@
|
||||
# ============================================================
|
||||
# File: scraper/tasks/save_tasks.py
|
||||
# Purpose: Save parsed chapter text to disk + trigger audio.
|
||||
# Updated for chapter_dict + book_meta pipeline model.
|
||||
# File: scraper/tasks/save_tasks.py (RESTORED AUDIO LOGIC)
|
||||
# ============================================================
|
||||
|
||||
print(">>> [IMPORT] save_tasks.py loaded")
|
||||
|
||||
from celery import shared_task
|
||||
import os
|
||||
from scraper.utils import get_save_path
|
||||
from scraper.tasks.download_tasks import log_msg # unified logger
|
||||
from scraper.progress import (
|
||||
inc_completed,
|
||||
inc_chapter_done,
|
||||
inc_chapter_download_skipped,
|
||||
)
|
||||
|
||||
from logbus.publisher import log
|
||||
from scraper.logger_decorators import logcall
|
||||
from scraper.utils.utils import get_save_path
|
||||
from scraper.tasks.download_tasks import log_msg
|
||||
from scraper.tasks.audio_tasks import generate_audio
|
||||
|
||||
from db.repository import inc_download_done, inc_download_skipped
|
||||
|
||||
|
||||
@shared_task(bind=True, queue="save", ignore_result=False)
|
||||
def save_chapter(self, parsed: dict):
|
||||
"""
|
||||
New pipeline model:
|
||||
parsed = {
|
||||
"book_id": str,
|
||||
"chapter": chapter_dict,
|
||||
"text": str,
|
||||
"length": int,
|
||||
"book_meta": dict,
|
||||
"skipped": bool,
|
||||
"path": optional str (if skipped)
|
||||
}
|
||||
"""
|
||||
|
||||
book_id = parsed.get("book_id", "NOBOOK")
|
||||
chapter_dict = parsed.get("chapter") or {}
|
||||
book_meta = parsed.get("book_meta") or {}
|
||||
|
||||
chapter_num = chapter_dict.get("num")
|
||||
chapter_title = chapter_dict.get("title") or f"Chapter {chapter_num}"
|
||||
volume_path = chapter_dict.get("volume_path")
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# VALIDATION
|
||||
# ------------------------------------------------------------
|
||||
if chapter_num is None or volume_path is None:
|
||||
raise ValueError("Invalid parsed payload: chapter_dict missing fields.")
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# SKIPPED CASE
|
||||
# ------------------------------------------------------------
|
||||
if parsed.get("skipped"):
|
||||
path = parsed.get("path", None)
|
||||
log_msg(book_id, f"[SAVE] SKIP chapter {chapter_num} → {path}")
|
||||
|
||||
inc_chapter_download_skipped(book_id)
|
||||
|
||||
volume_name = os.path.basename(volume_path.rstrip("/"))
|
||||
|
||||
# Queue audio only if a valid file exists
|
||||
@logcall
|
||||
def save_chapter(self, payload: dict):
|
||||
|
||||
if not payload:
|
||||
log("[SAVE] ERROR: payload is None")
|
||||
return {"error": True}
|
||||
|
||||
book_id = payload["book_id"]
|
||||
chapter = payload["chapter"]
|
||||
parsed = payload.get("parsed")
|
||||
path = payload.get("path")
|
||||
skipped = payload.get("skipped")
|
||||
|
||||
num = chapter["num"]
|
||||
title = chapter.get("title") or f"Chapter {num}"
|
||||
volume = chapter.get("volume_path")
|
||||
volume_name = os.path.basename(volume.rstrip("/"))
|
||||
|
||||
# ============================================================
|
||||
# SKIPPED CASE (restore old behavior)
|
||||
# ============================================================
|
||||
if skipped or not parsed:
|
||||
log_msg(book_id, f"[SAVE] SKIP chapter {num}")
|
||||
inc_download_skipped(book_id)
|
||||
|
||||
# Restore old behavior:
|
||||
# If file already exists, STILL trigger audio.
|
||||
if path and os.path.exists(path):
|
||||
log_msg(book_id, f"[AUDIO] Queueing audio for SKIPPED chapter {num}")
|
||||
try:
|
||||
generate_audio.delay(
|
||||
book_id,
|
||||
volume_name,
|
||||
chapter_num,
|
||||
chapter_title,
|
||||
path,
|
||||
)
|
||||
log_msg(
|
||||
book_id,
|
||||
f"[AUDIO] Task queued (SKIPPED) for chapter {chapter_num} in {volume_name}",
|
||||
)
|
||||
except Exception as audio_exc:
|
||||
log_msg(
|
||||
book_id,
|
||||
f"[AUDIO] ERROR queueing (SKIPPED) chapter {chapter_num}: {audio_exc}",
|
||||
)
|
||||
|
||||
return {
|
||||
"book_id": book_id,
|
||||
"chapter": chapter_dict,
|
||||
"path": path,
|
||||
"skipped": True,
|
||||
"book_meta": book_meta,
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# NORMAL SAVE CASE
|
||||
# ------------------------------------------------------------
|
||||
try:
|
||||
text = parsed.get("text", "")
|
||||
generate_audio.delay(book_id, volume_name, num, title, path)
|
||||
except Exception as exc:
|
||||
log_msg(book_id, f"[AUDIO] ERROR queueing skipped audio: {exc}")
|
||||
|
||||
# Ensure volume folder exists
|
||||
os.makedirs(volume_path, exist_ok=True)
|
||||
return payload
|
||||
|
||||
# Build final chapter file path
|
||||
path = get_save_path(chapter_num, volume_path)
|
||||
# ============================================================
|
||||
# NORMAL SAVE CASE
|
||||
# ============================================================
|
||||
try:
|
||||
os.makedirs(volume, exist_ok=True)
|
||||
save_path = get_save_path(num, volume)
|
||||
|
||||
# Write chapter text to file
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(text)
|
||||
with open(save_path, "w", encoding="utf-8") as f:
|
||||
f.write(parsed)
|
||||
|
||||
log_msg(book_id, f"[SAVE] Saved chapter {chapter_num} → {path}")
|
||||
inc_chapter_done(book_id)
|
||||
inc_completed(book_id)
|
||||
log_msg(book_id, f"[SAVE] Saved chapter {num} → {save_path}")
|
||||
|
||||
# Determine volume name
|
||||
volume_name = os.path.basename(volume_path.rstrip("/"))
|
||||
inc_download_done(book_id)
|
||||
|
||||
# Queue audio task
|
||||
# Restore old behavior → ALWAYS queue audio
|
||||
try:
|
||||
generate_audio.delay(
|
||||
book_id,
|
||||
volume_name,
|
||||
chapter_num,
|
||||
chapter_title,
|
||||
path,
|
||||
)
|
||||
log_msg(
|
||||
book_id,
|
||||
f"[AUDIO] Task queued for chapter {chapter_num} in {volume_name}",
|
||||
)
|
||||
except Exception as audio_exc:
|
||||
log_msg(
|
||||
book_id, f"[AUDIO] ERROR queueing chapter {chapter_num}: {audio_exc}"
|
||||
)
|
||||
|
||||
return {
|
||||
"book_id": book_id,
|
||||
"chapter": chapter_dict,
|
||||
"path": path,
|
||||
"book_meta": book_meta,
|
||||
}
|
||||
generate_audio.delay(book_id, volume_name, num, title, save_path)
|
||||
log_msg(book_id, f"[AUDIO] Task queued for chapter {num}")
|
||||
except Exception as exc:
|
||||
log_msg(book_id, f"[AUDIO] ERROR queueing chapter {num}: {exc}")
|
||||
|
||||
payload["path"] = save_path
|
||||
payload["skipped"] = False
|
||||
return payload
|
||||
|
||||
except Exception as exc:
|
||||
log_msg(book_id, f"[SAVE] ERROR saving chapter {chapter_num}: {exc}")
|
||||
log_msg(book_id, f"[SAVE] ERROR saving chapter {num}: {exc}")
|
||||
raise
|
||||
|
||||
@ -0,0 +1,141 @@
|
||||
# ============================================================
|
||||
# File: scraper/utils/state_sync.py
|
||||
# Purpose:
|
||||
# State inspection + optional sync logic for book progress.
|
||||
# This version provides:
|
||||
# • inspect_books_state() → NO writes, just a dry-run
|
||||
# • sync_books_from_redis() → NOT USED YET (kept commented)
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import redis
|
||||
from db.db import get_db
|
||||
|
||||
|
||||
def inspect_books_state():
|
||||
"""
|
||||
Reads all books from SQLite and fetches Redis progress,
|
||||
but performs NO writes. Only shows:
|
||||
- sqlite row
|
||||
- redis state
|
||||
- merged result (dry-run)
|
||||
|
||||
Returns a list of inspection dicts.
|
||||
"""
|
||||
r = redis.Redis.from_url(os.getenv("REDIS_BROKER"))
|
||||
db = get_db()
|
||||
cur = db.cursor()
|
||||
|
||||
cur.execute("SELECT * FROM books")
|
||||
rows = cur.fetchall()
|
||||
|
||||
results = []
|
||||
|
||||
for row in rows:
|
||||
book_id = row["book_id"]
|
||||
sqlite_row = dict(row)
|
||||
|
||||
# Read redis state
|
||||
redis_key = f"book:{book_id}:state"
|
||||
progress = r.hgetall(redis_key)
|
||||
|
||||
if progress:
|
||||
decoded = {k.decode(): v.decode() for k, v in progress.items()}
|
||||
else:
|
||||
decoded = {}
|
||||
|
||||
# Determine dry-run merged result
|
||||
merged = sqlite_row.copy()
|
||||
|
||||
if decoded:
|
||||
merged["downloaded"] = int(
|
||||
decoded.get("download_done", merged.get("downloaded", 0))
|
||||
)
|
||||
merged["parsed"] = int(decoded.get("parsed_done", merged.get("parsed", 0)))
|
||||
merged["audio_done"] = int(
|
||||
decoded.get("audio_done", merged.get("audio_done", 0))
|
||||
)
|
||||
merged["chapters_total"] = int(
|
||||
decoded.get("chapters_total", merged.get("chapters_total", 0))
|
||||
)
|
||||
merged["status"] = decoded.get("status", merged.get("status", "unknown"))
|
||||
|
||||
results.append(
|
||||
{
|
||||
"book_id": book_id,
|
||||
"sqlite": sqlite_row,
|
||||
"redis": decoded,
|
||||
"would_merge_to": merged,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def sync_books_from_redis():
|
||||
"""
|
||||
Reads all books from SQLite, fetches Redis progress,
|
||||
and updates SQLite rows accordingly.
|
||||
|
||||
Returns a list of {
|
||||
"book_id": ...,
|
||||
"before": ...,
|
||||
"redis": ...,
|
||||
"after": ...
|
||||
}
|
||||
"""
|
||||
r = redis.Redis.from_url(os.getenv("REDIS_BROKER"))
|
||||
db = get_db()
|
||||
cur = db.cursor()
|
||||
|
||||
# Haal alle boeken op
|
||||
cur.execute("SELECT * FROM books")
|
||||
rows = cur.fetchall()
|
||||
|
||||
results = []
|
||||
|
||||
for row in rows:
|
||||
book_id = row["book_id"]
|
||||
before = dict(row)
|
||||
|
||||
redis_key = f"book:{book_id}:state"
|
||||
progress = r.hgetall(redis_key)
|
||||
|
||||
if not progress:
|
||||
results.append(
|
||||
{"book_id": book_id, "before": before, "redis": {}, "after": before}
|
||||
)
|
||||
continue
|
||||
|
||||
# Decode Redis bytes → string dictionary
|
||||
decoded = {k.decode(): v.decode() for k, v in progress.items()}
|
||||
|
||||
# Extract counters
|
||||
downloaded = int(decoded.get("download_done", 0))
|
||||
parsed = int(decoded.get("parsed_done", 0))
|
||||
audio_done = int(decoded.get("audio_done", 0))
|
||||
chapters_total = int(decoded.get("chapters_total", 0))
|
||||
|
||||
# Redis status wins
|
||||
status = decoded.get("status", before["status"])
|
||||
|
||||
# Write back to SQLite
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE books
|
||||
SET downloaded = ?, parsed = ?, audio_done = ?, chapters_total = ?, status = ?, last_update = datetime('now')
|
||||
WHERE book_id = ?
|
||||
""",
|
||||
(downloaded, parsed, audio_done, chapters_total, status, book_id),
|
||||
)
|
||||
db.commit()
|
||||
|
||||
# Fetch updated row
|
||||
cur.execute("SELECT * FROM books WHERE book_id = ?", (book_id,))
|
||||
after = dict(cur.fetchone())
|
||||
|
||||
results.append(
|
||||
{"book_id": book_id, "before": before, "redis": decoded, "after": after}
|
||||
)
|
||||
|
||||
return results
|
||||
|
Before Width: | Height: | Size: 4.1 KiB |
|
Before Width: | Height: | Size: 6.4 KiB |
|
Before Width: | Height: | Size: 3.5 KiB |
|
Before Width: | Height: | Size: 12 KiB |
@ -0,0 +1,201 @@
|
||||
/* =======================================================================
|
||||
File: static/css/bookcard.css
|
||||
Purpose:
|
||||
All styling for registered book cards (book-card) +
|
||||
status colors + start/abort buttons.
|
||||
======================================================================= */
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
GRID WRAPPER FOR REGISTERED BOOKS
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.registered-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(340px, 1fr));
|
||||
gap: 20px;
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
MAIN BOOK CARD
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.book-card {
|
||||
position: relative;
|
||||
display: grid;
|
||||
grid-template-columns: 90px auto;
|
||||
gap: 15px;
|
||||
|
||||
padding: 15px;
|
||||
background: #fff;
|
||||
border-radius: 10px;
|
||||
border: 1px solid #e5e5e5;
|
||||
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
|
||||
|
||||
transition: border-color 0.25s ease, box-shadow 0.25s ease;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
BOOK STATUS COLORS
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.book-card.processing {
|
||||
border-color: #007aff;
|
||||
box-shadow: 0 0 6px rgba(0, 122, 255, 0.35);
|
||||
}
|
||||
|
||||
.book-card.downloading {
|
||||
border-color: #ff9500;
|
||||
box-shadow: 0 0 6px rgba(255, 149, 0, 0.35);
|
||||
}
|
||||
|
||||
.book-card.parsing {
|
||||
border-color: #ffcc00;
|
||||
box-shadow: 0 0 6px rgba(255, 204, 0, 0.35);
|
||||
}
|
||||
|
||||
.book-card.audio {
|
||||
border-color: #e65100;
|
||||
box-shadow: 0 0 6px rgba(230, 81, 0, 0.35);
|
||||
}
|
||||
|
||||
.book-card.completed {
|
||||
border-color: #34c759;
|
||||
box-shadow: 0 0 6px rgba(52, 199, 89, 0.35);
|
||||
}
|
||||
|
||||
.book-card.aborted {
|
||||
border-color: #ff3b30;
|
||||
box-shadow: 0 0 6px rgba(255, 59, 48, 0.35);
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
COVER IMAGE
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.book-cover {
|
||||
width: 90px;
|
||||
}
|
||||
|
||||
.book-img {
|
||||
width: 90px;
|
||||
height: 130px;
|
||||
object-fit: cover;
|
||||
border-radius: 4px;
|
||||
background: #f4f4f4;
|
||||
}
|
||||
|
||||
.placeholder {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
color: #777;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
META INFORMATION
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.book-meta {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: space-between;
|
||||
}
|
||||
|
||||
.book-title {
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
|
||||
.book-author {
|
||||
font-size: 14px;
|
||||
color: #444;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
.book-created {
|
||||
font-size: 12px;
|
||||
color: #666;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
ICON BUTTONS
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.icon-btn {
|
||||
width: 34px;
|
||||
height: 34px;
|
||||
border: none;
|
||||
border-radius: 8px;
|
||||
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
|
||||
font-size: 16px;
|
||||
color: #fff;
|
||||
cursor: pointer;
|
||||
|
||||
transition: background 0.15s ease, transform 0.1s ease;
|
||||
}
|
||||
|
||||
/* Start (green) */
|
||||
.icon-start {
|
||||
background: #2d8a3d;
|
||||
}
|
||||
.icon-start:hover {
|
||||
background: #226c30;
|
||||
transform: scale(1.05);
|
||||
}
|
||||
|
||||
.icon-start:disabled {
|
||||
background: #9bbb9f !important;
|
||||
cursor: not-allowed;
|
||||
transform: none;
|
||||
opacity: 0.5;
|
||||
}
|
||||
|
||||
/* Abort (red) */
|
||||
.icon-abort {
|
||||
background: #c62828;
|
||||
}
|
||||
.icon-abort:hover {
|
||||
background: #a31f1f;
|
||||
transform: scale(1.05);
|
||||
}
|
||||
|
||||
.icon-abort:disabled {
|
||||
background: #d8a0a0 !important;
|
||||
cursor: not-allowed;
|
||||
transform: none;
|
||||
opacity: 0.5;
|
||||
}
|
||||
|
||||
/* Hide button (gray) */
|
||||
.hide-form {
|
||||
position: absolute;
|
||||
top: 6px;
|
||||
right: 6px;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.icon-hide {
|
||||
background: #777;
|
||||
}
|
||||
.icon-hide:hover {
|
||||
background: #555;
|
||||
transform: scale(1.05);
|
||||
}
|
||||
/* -----------------------------------------------------------------------
|
||||
BOOK ACTIONS (right aligned button row)
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
.book-actions {
|
||||
display: flex;
|
||||
justify-content: flex-end; /* rechts uitlijnen */
|
||||
gap: 10px; /* ruimte tussen knoppen */
|
||||
margin-top: 12px;
|
||||
}
|
||||
@ -0,0 +1,82 @@
|
||||
{# ============================================================
|
||||
File: templates/components/bookcard.html
|
||||
Purpose:
|
||||
Eén enkele boekkaart met:
|
||||
- status styles
|
||||
- cover
|
||||
- metadata
|
||||
- hide button
|
||||
- start (play)
|
||||
- abort (stop)
|
||||
Requires:
|
||||
variable "b" in context
|
||||
============================================================ #}
|
||||
|
||||
<div class="book-card {{ b.status }}" data-book-id="{{ b.book_id }}">
|
||||
|
||||
<!-- ======================================================
|
||||
HIDE BUTTON (icon-only)
|
||||
====================================================== -->
|
||||
<form
|
||||
action="/hide/{{ b.book_id }}"
|
||||
method="POST"
|
||||
onsubmit="return confirm('Dit boek verbergen?')"
|
||||
class="hide-form"
|
||||
>
|
||||
<button class="icon-btn icon-hide" title="Verbergen">
|
||||
<i class="fa-solid fa-xmark"></i>
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<!-- ======================================================
|
||||
COVER
|
||||
====================================================== -->
|
||||
<div class="book-cover">
|
||||
{% if b.cover_path %}
|
||||
<img src="/{{ b.cover_path }}" alt="cover" class="book-img" />
|
||||
{% else %}
|
||||
<div class="book-img placeholder">?</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- ======================================================
|
||||
META + BUTTONS
|
||||
====================================================== -->
|
||||
<div class="book-meta">
|
||||
<div class="book-title">{{ b.title }}</div>
|
||||
<div class="book-author">{{ b.author }}</div>
|
||||
<div class="book-created">Geregistreerd: {{ b.created_at }}</div>
|
||||
|
||||
<div class="book-actions">
|
||||
<!-- START -->
|
||||
<form action="/start" method="POST">
|
||||
<input type="hidden" name="book_id" value="{{ b.book_id }}" />
|
||||
<button
|
||||
class="icon-btn icon-start"
|
||||
title="Start scraping"
|
||||
{% if b.status != "registered" %}
|
||||
disabled
|
||||
{% endif %}
|
||||
>
|
||||
<i class="fa-solid fa-play"></i>
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<!-- ABORT -->
|
||||
<form action="/abort/{{ b.book_id }}" method="POST">
|
||||
<input type="hidden" name="book_id" value="{{ b.book_id }}" />
|
||||
<button
|
||||
class="icon-btn icon-abort"
|
||||
title="Stoppen (abort)"
|
||||
{% if b.status not in ["processing","downloading","parsing","audio"] %}
|
||||
disabled
|
||||
{% endif %}
|
||||
>
|
||||
<i class="fa-solid fa-stop"></i>
|
||||
</button>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
</div> <!-- einde .book-meta -->
|
||||
|
||||
</div> <!-- einde .book-card -->
|
||||
@ -0,0 +1,21 @@
|
||||
{# ============================================================ File:
|
||||
templates/components/registered_books.html Purpose: Toon een grid van
|
||||
geregistreerde boeken. Elke kaart wordt gerenderd via bookcard.html.
|
||||
============================================================ #}
|
||||
|
||||
<section class="dashboard-section">
|
||||
<h2>Geregistreerde boeken</h2>
|
||||
|
||||
{% if registered and registered|length > 0 %}
|
||||
|
||||
<div class="registered-grid">
|
||||
{% for b in registered %} {% include "components/bookcard.html" %} {% endfor
|
||||
%}
|
||||
</div>
|
||||
|
||||
{% else %}
|
||||
|
||||
<p>Geen geregistreerde boeken.</p>
|
||||
|
||||
{% endif %}
|
||||
</section>
|
||||
@ -0,0 +1,88 @@
|
||||
{% extends "layout.html" %} {% block content %}
|
||||
|
||||
<h1>State Inspection (SQL vs Redis)</h1>
|
||||
|
||||
<style>
|
||||
.state-card {
|
||||
border: 1px solid #444;
|
||||
padding: 18px;
|
||||
margin-bottom: 30px;
|
||||
background: #222;
|
||||
border-radius: 8px;
|
||||
}
|
||||
|
||||
.state-title {
|
||||
font-size: 1.4em;
|
||||
margin-bottom: 14px;
|
||||
color: #9cf;
|
||||
}
|
||||
|
||||
table.state-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-bottom: 12px;
|
||||
}
|
||||
|
||||
.state-table th,
|
||||
.state-table td {
|
||||
border: 1px solid #555;
|
||||
padding: 6px 10px;
|
||||
}
|
||||
|
||||
.state-table th {
|
||||
background: #333;
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
.same {
|
||||
color: #9f9;
|
||||
}
|
||||
|
||||
.diff {
|
||||
color: #ff7b7b;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.empty {
|
||||
color: #aaa;
|
||||
font-style: italic;
|
||||
}
|
||||
</style>
|
||||
|
||||
{% macro cmp(sqlval, redisval) %} {% if (sqlval|string) == (redisval|string) %}
|
||||
<td class="same">{{ sqlval }}</td>
|
||||
<td class="same">{{ redisval }}</td>
|
||||
{% else %}
|
||||
<td class="diff">{{ sqlval }}</td>
|
||||
<td class="diff">{{ redisval }}</td>
|
||||
{% endif %} {% endmacro %} {% for entry in results %}
|
||||
<div class="state-card">
|
||||
<div class="state-title">📘 {{ entry.book_id }}</div>
|
||||
|
||||
{% set sql = entry.sqlite %} {% set redis = entry.redis %} {% set merged =
|
||||
entry.would_merge_to %}
|
||||
|
||||
<table class="state-table">
|
||||
<tr>
|
||||
<th>Field</th>
|
||||
<th>SQLite</th>
|
||||
<th>Redis</th>
|
||||
<th>Merged Result</th>
|
||||
</tr>
|
||||
|
||||
{% for field in [ "status", "chapters_total", "downloaded",
|
||||
"chapters_download_done", "chapters_download_skipped", "parsed",
|
||||
"chapters_parsed_done", "audio_done", "audio_skipped", "last_update" ] %}
|
||||
|
||||
<tr>
|
||||
<th>{{ field }}</th>
|
||||
|
||||
<td>{{ sql.get(field, '') }}</td>
|
||||
<td>{{ redis.get(field, '') }}</td>
|
||||
<td>{{ merged.get(field, '') }}</td>
|
||||
</tr>
|
||||
|
||||
{% endfor %}
|
||||
</table>
|
||||
</div>
|
||||
{% endfor %} {% endblock %}
|
||||
@ -0,0 +1,91 @@
|
||||
{% extends "layout.html" %} {% block content %}
|
||||
<h1>Celery Queue Debug</h1>
|
||||
|
||||
<style>
|
||||
.debug-section {
|
||||
margin-bottom: 40px;
|
||||
}
|
||||
.debug-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.debug-table th,
|
||||
.debug-table td {
|
||||
border: 1px solid #444;
|
||||
padding: 6px 10px;
|
||||
}
|
||||
.debug-table th {
|
||||
background: #333;
|
||||
color: #fff;
|
||||
}
|
||||
pre {
|
||||
background: #1e1e1e;
|
||||
color: #ddd;
|
||||
padding: 10px;
|
||||
overflow-x: auto;
|
||||
}
|
||||
code {
|
||||
color: #9cf;
|
||||
}
|
||||
</style>
|
||||
|
||||
<div class="debug-section">
|
||||
<h2>Workers</h2>
|
||||
|
||||
<h3>Active Tasks</h3>
|
||||
<pre>{{ workers_active | tojson(indent=2) }}</pre>
|
||||
|
||||
<h3>Reserved</h3>
|
||||
<pre>{{ workers_reserved | tojson(indent=2) }}</pre>
|
||||
|
||||
<h3>Scheduled</h3>
|
||||
<pre>{{ workers_scheduled | tojson(indent=2) }}</pre>
|
||||
</div>
|
||||
|
||||
<hr />
|
||||
|
||||
<div class="debug-section">
|
||||
<h2>Queues</h2>
|
||||
|
||||
{% for q in queues %}
|
||||
<div class="debug-queue">
|
||||
<h3>{{ q.name }} ({{ q.length }} items)</h3>
|
||||
|
||||
<table class="debug-table">
|
||||
<tr>
|
||||
<th>Redis Key</th>
|
||||
<td>{{ q.redis_key }}</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<th>Length</th>
|
||||
<td>{{ q.length }}</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<th>Items (first 30)</th>
|
||||
<td>
|
||||
{% if q["items"] %}
|
||||
<ul style="margin: 0; padding-left: 20px">
|
||||
{% for item in q["items"] %}
|
||||
<li><code>{{ item | e }}</code></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<i>No items</i>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
<script>
|
||||
setInterval(() => {
|
||||
window.location.reload();
|
||||
}, 5000);
|
||||
</script>
|
||||
|
||||
{% endblock %}
|
||||
@ -1,34 +1,53 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="nl">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<title>BookScraper</title>
|
||||
<style>
|
||||
body { font-family: Arial, sans-serif; padding: 40px; max-width: 600px; margin: auto; }
|
||||
h1 { margin-bottom: 20px; }
|
||||
input[type="text"] {
|
||||
width: 100%; padding: 12px; font-size: 16px;
|
||||
border: 1px solid #ccc; border-radius: 6px;
|
||||
}
|
||||
button {
|
||||
margin-top: 20px;
|
||||
padding: 12px 20px;
|
||||
background: #007bff; color: white;
|
||||
border: none; border-radius: 6px;
|
||||
font-size: 16px; cursor: pointer;
|
||||
}
|
||||
button:hover { background: #0056b3; }
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
padding: 40px;
|
||||
max-width: 600px;
|
||||
margin: auto;
|
||||
}
|
||||
h1 {
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
input[type="text"] {
|
||||
width: 100%;
|
||||
padding: 12px;
|
||||
font-size: 16px;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 6px;
|
||||
}
|
||||
button {
|
||||
margin-top: 20px;
|
||||
padding: 12px 20px;
|
||||
background: #007bff;
|
||||
color: white;
|
||||
border: none;
|
||||
border-radius: 6px;
|
||||
font-size: 16px;
|
||||
cursor: pointer;
|
||||
}
|
||||
button:hover {
|
||||
background: #0056b3;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
</head>
|
||||
<body>
|
||||
<h1>BookScraper WebGUI</h1>
|
||||
|
||||
<h1>BookScraper WebGUI</h1>
|
||||
|
||||
<form action="/start" method="POST">
|
||||
<label for="url">Geef een boek-URL op:</label><br><br>
|
||||
<input type="text" id="url" name="url" placeholder="https://example.com/book/12345" required>
|
||||
<button type="submit">Start Scraping</button>
|
||||
</form>
|
||||
|
||||
</body>
|
||||
<form action="/init" method="POST">
|
||||
<label for="url">Geef een boek-URL op:</label><br /><br />
|
||||
<input
|
||||
type="text"
|
||||
id="url"
|
||||
name="url"
|
||||
placeholder="https://example.com/book/12345"
|
||||
required
|
||||
/>
|
||||
<button type="submit">Start Scraping</button>
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
Loading…
Reference in new issue