# ============================================ # File: bookscraper/app.py (ASYNC SCRAPING) # ============================================ from dotenv import load_dotenv load_dotenv() print(">>> [WEB] Importing celery_app …") from celery_app import celery_app from flask import Flask, render_template, request, jsonify from scraper.logger import log_debug # Abort + Progress (per book_id) from scraper.abort import set_abort from scraper.progress import get_progress # UI LOGS (GLOBAL — no book_id) from scraper.ui_log import get_ui_logs, reset_ui_logs from celery.result import AsyncResult from scraper.state import state as r # Cover serving from flask import send_from_directory import os import redis # Flask app = Flask(__name__) # ===================================================== # STATIC FILE SERVING FOR OUTPUT # ===================================================== OUTPUT_ROOT = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output") @app.route("/output/") def serve_output(filename): return send_from_directory(OUTPUT_ROOT, filename, as_attachment=False) # ===================================================== # HOME PAGE # ===================================================== @app.route("/", methods=["GET"]) def index(): return render_template("index.html") # ===================================================== # START SCRAPING (async via Celery) # ===================================================== @app.route("/start", methods=["POST"]) def start_scraping(): url = request.form.get("url", "").strip() if not url: # ★ FIX: dashboard moet altijd books + logs meekrijgen return render_template( "dashboard/dashboard.html", error="Geen URL opgegeven.", books=list_active_books(), logs=get_ui_logs(), ) reset_ui_logs() log_debug(f"[WEB] Scraping via Celery: {url}") async_result = celery_app.send_task( "scraper.tasks.scraping.start_scrape_book", args=[url], queue="scraping", ) # ★ FIX: direct dashboard tonen met actuele data return render_template( "dashboard/dashboard.html", scraping_task_id=async_result.id, books=list_active_books(), logs=get_ui_logs(), ) # ===================================================== # CLEAR UI LOGS # ===================================================== @app.route("/clear-logs", methods=["POST"]) def clear_logs(): reset_ui_logs() return jsonify({"status": "ok", "message": "UI logs cleared"}) # ===================================================== # ABORT (per book_id) # ===================================================== @app.route("/abort/", methods=["POST"]) def abort_download(book_id): log_debug(f"[WEB] Abort requested for book: {book_id}") set_abort(book_id) return jsonify({"status": "ok", "aborted": book_id}) # ===================================================== # PROGRESS (per book_id) # ===================================================== @app.route("/progress/", methods=["GET"]) def progress(book_id): return jsonify(get_progress(book_id)) # ===================================================== # LOGS — GLOBAL UI LOGS # ===================================================== @app.route("/logs", methods=["GET"]) def logs(): return jsonify({"logs": get_ui_logs()}) # ===================================================== # CELERY RESULT → return book_id # ===================================================== @app.route("/celery-result/", methods=["GET"]) def celery_result(task_id): result = AsyncResult(task_id, app=celery_app) if result.successful(): return jsonify({"ready": True, "result": result.get()}) if result.failed(): return jsonify({"ready": True, "error": "failed"}) return jsonify({"ready": False}) # ===================================================== # REDIS BACKEND — BOOK STATE MODEL # ===================================================== REDIS_URL = os.getenv("REDIS_BROKER", "redis://redis:6379/0") r = redis.Redis.from_url(REDIS_URL, decode_responses=True) def list_active_books(): """Return list of active books from Redis Book State Model.""" keys = r.keys("book:*:status") books = [] for key in keys: book_id = key.split(":")[1] status = r.get(f"book:{book_id}:status") or "unknown" title = r.get(f"book:{book_id}:title") or book_id dl_done = int(r.get(f"book:{book_id}:download:done") or 0) dl_total = int(r.get(f"book:{book_id}:download:total") or 0) au_done = int(r.get(f"book:{book_id}:audio:done") or 0) au_total = dl_total books.append( { "book_id": book_id, "title": title, "status": status, "download_done": dl_done, "download_total": dl_total, "audio_done": au_done, "audio_total": au_total, } ) return books # ===================================================== # API: list all active books # ===================================================== @app.route("/api/books") def api_books(): return jsonify(list_active_books()) # ===================================================== # API: book status # ===================================================== @app.route("/api/book//status") def api_book_status(book_id): status = r.get(f"book:{book_id}:status") or "unknown" dl_done = int(r.get(f"book:{book_id}:download:done") or 0) dl_total = int(r.get(f"book:{book_id}:download:total") or 0) au_done = int(r.get(f"book:{book_id}:audio:done") or 0) au_total = dl_total return jsonify( { "book_id": book_id, "status": status, "download_done": dl_done, "download_total": dl_total, "audio_done": au_done, "audio_total": au_total, } ) # ===================================================== # API: book logs # ===================================================== @app.route("/api/book//logs") def api_book_logs(book_id): logs = r.lrange(f"logs:{book_id}", 0, -1) or [] return jsonify(logs) # ===================================================== # VIEW: DASHBOARD # ===================================================== @app.route("/dashboard") def dashboard(): # ★ FIX: dashboard moet altijd books + logs krijgen return render_template( "dashboard/dashboard.html", books=list_active_books(), logs=get_ui_logs(), ) # ===================================================== # VIEW: BOOK DETAIL PAGE # ===================================================== @app.route("/book/") def book_detail(book_id): title = r.get(f"book:{book_id}:title") or book_id return render_template( "dashboard/book_detail.html", book_id=book_id, title=title, logs=get_ui_logs(), ) @app.route("/debug/redis-keys") def debug_redis_keys(): cursor = 0 results = {} while True: cursor, keys = r.scan(cursor, match="*", count=200) for k in keys: try: results[k] = r.get(k) except: results[k] = "" if cursor == 0: break return jsonify(results) # ===================================================== # RUN FLASK # ===================================================== if __name__ == "__main__": debug = os.getenv("FLASK_DEBUG", "0") == "1" host = os.getenv("HOST", "0.0.0.0") port = int(os.getenv("PORT", "5000")) log_debug(f"[WEB] Starting Flask server on {host}:{port}, debug={debug}") app.run(host=host, port=port, debug=debug)