|
|
|
|
@ -1,41 +1,36 @@
|
|
|
|
|
# ============================================
|
|
|
|
|
# File: bookscraper/app.py (ASYNC SCRAPING)
|
|
|
|
|
# ============================================
|
|
|
|
|
|
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
|
|
|
|
|
|
load_dotenv()
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
import redis
|
|
|
|
|
from flask import Flask, render_template, request, jsonify, send_from_directory
|
|
|
|
|
|
|
|
|
|
print(">>> [WEB] Importing celery_app …")
|
|
|
|
|
from celery_app import celery_app
|
|
|
|
|
from db.db import init_db
|
|
|
|
|
from celery.result import AsyncResult
|
|
|
|
|
|
|
|
|
|
init_db() # ensure DB schema exists before Flask starts
|
|
|
|
|
|
|
|
|
|
from flask import Flask, render_template, request, jsonify
|
|
|
|
|
from scraper.logger import log_debug
|
|
|
|
|
|
|
|
|
|
# Abort + Progress (per book_id)
|
|
|
|
|
from scraper.abort import set_abort
|
|
|
|
|
from scraper.progress import get_progress
|
|
|
|
|
|
|
|
|
|
# UI LOGS (GLOBAL — no book_id)
|
|
|
|
|
from scraper.ui_log import get_ui_logs, reset_ui_logs
|
|
|
|
|
|
|
|
|
|
from celery.result import AsyncResult
|
|
|
|
|
from scraper.ui_log import get_ui_logs, reset_ui_logs, get_ui_logs_delta
|
|
|
|
|
from scraper.state import state as r
|
|
|
|
|
|
|
|
|
|
# Cover serving
|
|
|
|
|
from flask import send_from_directory
|
|
|
|
|
import os
|
|
|
|
|
from scraper.services.init_service import InitService
|
|
|
|
|
|
|
|
|
|
import redis
|
|
|
|
|
from db.repository import get_registered_books
|
|
|
|
|
|
|
|
|
|
# INIT DB
|
|
|
|
|
init_db()
|
|
|
|
|
|
|
|
|
|
# Flask
|
|
|
|
|
app = Flask(__name__)
|
|
|
|
|
|
|
|
|
|
# =====================================================
|
|
|
|
|
# STATIC FILE SERVING FOR OUTPUT
|
|
|
|
|
# STATIC FILE SERVING
|
|
|
|
|
# =====================================================
|
|
|
|
|
OUTPUT_ROOT = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
|
|
|
|
|
|
|
|
|
|
@ -46,26 +41,96 @@ def serve_output(filename):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =====================================================
|
|
|
|
|
# HOME PAGE
|
|
|
|
|
# SECTION 1 — NAVIGATION / HTML PAGES
|
|
|
|
|
# =====================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/", methods=["GET"])
|
|
|
|
|
def index():
|
|
|
|
|
return render_template("index.html")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/dashboard", methods=["GET"])
|
|
|
|
|
def dashboard():
|
|
|
|
|
logs_list = get_ui_logs() or []
|
|
|
|
|
return render_template(
|
|
|
|
|
"dashboard/dashboard.html",
|
|
|
|
|
books=list_active_books(), # Redis
|
|
|
|
|
registered=get_registered_books(), # SQLite INIT results
|
|
|
|
|
logs=logs_list,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/book/<book_id>")
|
|
|
|
|
def book_detail(book_id):
|
|
|
|
|
title = r.get(f"book:{book_id}:title") or book_id
|
|
|
|
|
return render_template(
|
|
|
|
|
"dashboard/book_detail.html",
|
|
|
|
|
book_id=book_id,
|
|
|
|
|
title=title,
|
|
|
|
|
logs=get_ui_logs(),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =====================================================
|
|
|
|
|
# START SCRAPING (async via Celery)
|
|
|
|
|
# SECTION 2 — ACTION ROUTES (INIT, START, ABORT)
|
|
|
|
|
# =====================================================
|
|
|
|
|
|
|
|
|
|
# CORRECT PATH — services/ is root-level
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/init", methods=["POST"])
|
|
|
|
|
def init_book():
|
|
|
|
|
"""
|
|
|
|
|
INIT-flow:
|
|
|
|
|
- user enters URL
|
|
|
|
|
- lightweight metadata fetch
|
|
|
|
|
- insert into SQLite as 'registered'
|
|
|
|
|
- return dashboard HTML (NOT JSON)
|
|
|
|
|
"""
|
|
|
|
|
url = request.form.get("url", "").strip()
|
|
|
|
|
|
|
|
|
|
if not url:
|
|
|
|
|
return render_template(
|
|
|
|
|
"dashboard/dashboard.html",
|
|
|
|
|
error="Geen URL opgegeven.",
|
|
|
|
|
books=list_active_books(),
|
|
|
|
|
registered=get_registered_books(),
|
|
|
|
|
logs=get_ui_logs(),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
result = InitService.execute(url)
|
|
|
|
|
msg = f"Boek geregistreerd: {result.get('title')}"
|
|
|
|
|
|
|
|
|
|
return render_template(
|
|
|
|
|
"dashboard/dashboard.html",
|
|
|
|
|
message=msg,
|
|
|
|
|
books=list_active_books(), # Redis
|
|
|
|
|
registered=get_registered_books(), # SQLite INIT results
|
|
|
|
|
logs=get_ui_logs(),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log_debug(f"[INIT] ERROR: {e}")
|
|
|
|
|
return render_template(
|
|
|
|
|
"dashboard/dashboard.html",
|
|
|
|
|
error=f"INIT mislukt: {e}",
|
|
|
|
|
books=list_active_books(),
|
|
|
|
|
registered=get_registered_books(),
|
|
|
|
|
logs=get_ui_logs(),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/start", methods=["POST"])
|
|
|
|
|
def start_scraping():
|
|
|
|
|
url = request.form.get("url", "").strip()
|
|
|
|
|
|
|
|
|
|
if not url:
|
|
|
|
|
# ★ FIX: dashboard moet altijd books + logs meekrijgen
|
|
|
|
|
return render_template(
|
|
|
|
|
"dashboard/dashboard.html",
|
|
|
|
|
error="Geen URL opgegeven.",
|
|
|
|
|
books=list_active_books(),
|
|
|
|
|
registered=get_registered_books(),
|
|
|
|
|
logs=get_ui_logs(),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
@ -78,27 +143,15 @@ def start_scraping():
|
|
|
|
|
queue="scraping",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# ★ FIX: direct dashboard tonen met actuele data
|
|
|
|
|
return render_template(
|
|
|
|
|
"dashboard/dashboard.html",
|
|
|
|
|
scraping_task_id=async_result.id,
|
|
|
|
|
books=list_active_books(),
|
|
|
|
|
registered=get_registered_books(),
|
|
|
|
|
logs=get_ui_logs(),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =====================================================
|
|
|
|
|
# CLEAR UI LOGS
|
|
|
|
|
# =====================================================
|
|
|
|
|
@app.route("/clear-logs", methods=["POST"])
|
|
|
|
|
def clear_logs():
|
|
|
|
|
reset_ui_logs()
|
|
|
|
|
return jsonify({"status": "ok", "message": "UI logs cleared"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =====================================================
|
|
|
|
|
# ABORT (per book_id)
|
|
|
|
|
# =====================================================
|
|
|
|
|
@app.route("/abort/<book_id>", methods=["POST"])
|
|
|
|
|
def abort_download(book_id):
|
|
|
|
|
log_debug(f"[WEB] Abort requested for book: {book_id}")
|
|
|
|
|
@ -107,87 +160,10 @@ def abort_download(book_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =====================================================
|
|
|
|
|
# PROGRESS (per book_id)
|
|
|
|
|
# =====================================================
|
|
|
|
|
@app.route("/progress/<book_id>", methods=["GET"])
|
|
|
|
|
def progress(book_id):
|
|
|
|
|
return jsonify(get_progress(book_id))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =====================================================
|
|
|
|
|
# CELERY RESULT → return book_id
|
|
|
|
|
# =====================================================
|
|
|
|
|
@app.route("/celery-result/<task_id>", methods=["GET"])
|
|
|
|
|
def celery_result(task_id):
|
|
|
|
|
result = AsyncResult(task_id, app=celery_app)
|
|
|
|
|
|
|
|
|
|
if result.successful():
|
|
|
|
|
return jsonify({"ready": True, "result": result.get()})
|
|
|
|
|
if result.failed():
|
|
|
|
|
return jsonify({"ready": True, "error": "failed"})
|
|
|
|
|
return jsonify({"ready": False})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =====================================================
|
|
|
|
|
# API: book status new model
|
|
|
|
|
# =====================================================
|
|
|
|
|
def getStatus(book_id):
|
|
|
|
|
|
|
|
|
|
state = r.hgetall(f"book:{book_id}:state")
|
|
|
|
|
status = state.get("status") or "unknown"
|
|
|
|
|
dl_done = int(state.get("chapters_download_done", 0))
|
|
|
|
|
dl_skipped = int(state.get("chapters_download_skipped", 0))
|
|
|
|
|
dl_total = int(state.get("chapters_total", 0))
|
|
|
|
|
au_done = int(state.get("audio_done") or 0)
|
|
|
|
|
title = state.get("title") or book_id
|
|
|
|
|
|
|
|
|
|
au_total = dl_total
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"book_id": book_id,
|
|
|
|
|
"title": title,
|
|
|
|
|
"status": status,
|
|
|
|
|
"download_done": dl_done,
|
|
|
|
|
"download_skipped": dl_skipped,
|
|
|
|
|
"download_total": dl_total,
|
|
|
|
|
"audio_done": au_done,
|
|
|
|
|
"audio_total": au_total,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =====================================================
|
|
|
|
|
# REDIS BACKEND — BOOK STATE MODEL
|
|
|
|
|
# SECTION 3 — API ROUTES (JSON)
|
|
|
|
|
# =====================================================
|
|
|
|
|
REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0")
|
|
|
|
|
r = redis.Redis.from_url(REDIS_URL, decode_responses=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def list_active_booksold():
|
|
|
|
|
"""Return list of active books from Redis Book State Model."""
|
|
|
|
|
keys = r.keys("book:*:state")
|
|
|
|
|
books = []
|
|
|
|
|
|
|
|
|
|
for key in keys:
|
|
|
|
|
book_id = key.split(":")[1]
|
|
|
|
|
print(book_id)
|
|
|
|
|
books.append(getStatus(book_id))
|
|
|
|
|
|
|
|
|
|
return books
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def list_active_books():
|
|
|
|
|
books = []
|
|
|
|
|
for key in r.scan_iter(match="book:*:state", count=1000):
|
|
|
|
|
first = key.find(":")
|
|
|
|
|
second = key.find(":", first + 1)
|
|
|
|
|
book_id = key[first + 1 : second]
|
|
|
|
|
books.append(getStatus(book_id))
|
|
|
|
|
return books
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =====================================================
|
|
|
|
|
# API: list all active books
|
|
|
|
|
# =====================================================
|
|
|
|
|
@app.route("/api/books")
|
|
|
|
|
def api_books():
|
|
|
|
|
return jsonify(list_active_books())
|
|
|
|
|
@ -195,45 +171,50 @@ def api_books():
|
|
|
|
|
|
|
|
|
|
@app.route("/api/book/<book_id>/status")
|
|
|
|
|
def api_book_status(book_id):
|
|
|
|
|
|
|
|
|
|
return jsonify(getStatus(book_id))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =====================================================
|
|
|
|
|
# API: book logs
|
|
|
|
|
# =====================================================
|
|
|
|
|
@app.route("/api/book/<book_id>/logs")
|
|
|
|
|
def api_book_logs(book_id):
|
|
|
|
|
logs = r.lrange(f"logs:{book_id}", 0, -1) or []
|
|
|
|
|
return jsonify(logs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =====================================================
|
|
|
|
|
# VIEW: DASHBOARD
|
|
|
|
|
# =====================================================
|
|
|
|
|
@app.route("/dashboard")
|
|
|
|
|
def dashboard():
|
|
|
|
|
logs_list = get_ui_logs() or []
|
|
|
|
|
# ★ FIX: dashboard moet altijd books + logs krijgen
|
|
|
|
|
return render_template(
|
|
|
|
|
"dashboard/dashboard.html",
|
|
|
|
|
books=list_active_books(),
|
|
|
|
|
logs=logs_list, # dashboard krijgt LIST, geen dict
|
|
|
|
|
)
|
|
|
|
|
@app.route("/progress/<book_id>")
|
|
|
|
|
def progress(book_id):
|
|
|
|
|
return jsonify(get_progress(book_id))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/celery-result/<task_id>")
|
|
|
|
|
def celery_result(task_id):
|
|
|
|
|
result = AsyncResult(task_id, app=celery_app)
|
|
|
|
|
if result.successful():
|
|
|
|
|
return jsonify({"ready": True, "result": result.get()})
|
|
|
|
|
if result.failed():
|
|
|
|
|
return jsonify({"ready": True, "error": "failed"})
|
|
|
|
|
return jsonify({"ready": False})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/clear-logs", methods=["POST"])
|
|
|
|
|
def clear_logs():
|
|
|
|
|
reset_ui_logs()
|
|
|
|
|
return jsonify({"status": "ok", "message": "UI logs cleared"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/logs", methods=["GET"])
|
|
|
|
|
def logs():
|
|
|
|
|
try:
|
|
|
|
|
last_index = int(request.args.get("last_index", -1))
|
|
|
|
|
except:
|
|
|
|
|
last_index = -1
|
|
|
|
|
|
|
|
|
|
new_lines, total = get_ui_logs_delta(last_index)
|
|
|
|
|
return jsonify({"lines": new_lines, "total": total})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =====================================================
|
|
|
|
|
# VIEW: BOOK DETAIL PAGE
|
|
|
|
|
# SECTION 4 — DEBUG ROUTES
|
|
|
|
|
# =====================================================
|
|
|
|
|
@app.route("/book/<book_id>")
|
|
|
|
|
def book_detail(book_id):
|
|
|
|
|
title = r.get(f"book:{book_id}:title") or book_id
|
|
|
|
|
return render_template(
|
|
|
|
|
"dashboard/book_detail.html",
|
|
|
|
|
book_id=book_id,
|
|
|
|
|
title=title,
|
|
|
|
|
logs=get_ui_logs(),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/debug/redis-keys")
|
|
|
|
|
@ -254,37 +235,65 @@ def debug_redis_keys():
|
|
|
|
|
return jsonify(results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ============================================================
|
|
|
|
|
# Rolling log endpoint (no new file)
|
|
|
|
|
# ============================================================
|
|
|
|
|
|
|
|
|
|
from flask import jsonify, request
|
|
|
|
|
|
|
|
|
|
# =====================================================
|
|
|
|
|
# ROLLING LOG ENDPOINT — DELTA POLLING VIA ui_log
|
|
|
|
|
# DB DEBUG: LIST ALL BOOKS FROM SQLITE
|
|
|
|
|
# =====================================================
|
|
|
|
|
from scraper.ui_log import get_ui_logs_delta
|
|
|
|
|
from db.repository import fetch_all_books
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/logs", methods=["GET"])
|
|
|
|
|
def logs():
|
|
|
|
|
@app.route("/api/db/books")
|
|
|
|
|
def api_db_books():
|
|
|
|
|
"""
|
|
|
|
|
Delta log delivery for WebGUI.
|
|
|
|
|
Browser sends ?last_index=N, we return only new lines.
|
|
|
|
|
Return ALL books stored in SQLite — including INIT-only entries.
|
|
|
|
|
Useful to verify that /init wrote correct metadata.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
last_index = int(request.args.get("last_index", -1))
|
|
|
|
|
except:
|
|
|
|
|
last_index = -1
|
|
|
|
|
books = fetch_all_books()
|
|
|
|
|
return jsonify({"status": "ok", "books": books})
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return jsonify({"status": "error", "message": str(e)}), 500
|
|
|
|
|
|
|
|
|
|
new_lines, total = get_ui_logs_delta(last_index)
|
|
|
|
|
|
|
|
|
|
return jsonify({"lines": new_lines, "total": total})
|
|
|
|
|
# =====================================================
|
|
|
|
|
# SECTION 5 — INTERNAL HELPERS
|
|
|
|
|
# =====================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def getStatus(book_id):
|
|
|
|
|
state = r.hgetall(f"book:{book_id}:state")
|
|
|
|
|
status = state.get("status") or "unknown"
|
|
|
|
|
dl_done = int(state.get("chapters_download_done", 0))
|
|
|
|
|
dl_skipped = int(state.get("chapters_download_skipped", 0))
|
|
|
|
|
dl_total = int(state.get("chapters_total", 0))
|
|
|
|
|
au_done = int(state.get("audio_done") or 0)
|
|
|
|
|
title = state.get("title") or book_id
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"book_id": book_id,
|
|
|
|
|
"title": title,
|
|
|
|
|
"status": status,
|
|
|
|
|
"download_done": dl_done,
|
|
|
|
|
"download_skipped": dl_skipped,
|
|
|
|
|
"download_total": dl_total,
|
|
|
|
|
"audio_done": au_done,
|
|
|
|
|
"audio_total": dl_total,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def list_active_books():
|
|
|
|
|
books = []
|
|
|
|
|
for key in r.scan_iter(match="book:*:state", count=1000):
|
|
|
|
|
first = key.find(":")
|
|
|
|
|
second = key.find(":", first + 1)
|
|
|
|
|
book_id = key[first + 1 : second]
|
|
|
|
|
books.append(getStatus(book_id))
|
|
|
|
|
return books
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =====================================================
|
|
|
|
|
# RUN FLASK
|
|
|
|
|
# SECTION 6 — FLASK RUNNER
|
|
|
|
|
# =====================================================
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
debug = os.getenv("FLASK_DEBUG", "0") == "1"
|
|
|
|
|
host = os.getenv("HOST", "0.0.0.0")
|
|
|
|
|
|