# ============================================ # File: bookscraper/app.py (ASYNC SCRAPING) # ============================================ from dotenv import load_dotenv load_dotenv() import os import redis from flask import Flask, render_template, request, jsonify, send_from_directory print(">>> [WEB] Importing celery_app …") from celery_app import celery_app from db.db import init_db from celery.result import AsyncResult from scraper.logger import log_debug from scraper.abort import set_abort from scraper.progress import get_progress from scraper.ui_log import get_ui_logs, reset_ui_logs, get_ui_logs_delta from scraper.state import state as r from scraper.services.init_service import InitService from db.repository import get_registered_books # INIT DB init_db() app = Flask(__name__) # ===================================================== # STATIC FILE SERVING # ===================================================== OUTPUT_ROOT = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output") @app.route("/output/") def serve_output(filename): return send_from_directory(OUTPUT_ROOT, filename, as_attachment=False) # ===================================================== # SECTION 1 — NAVIGATION / HTML PAGES # ===================================================== @app.route("/", methods=["GET"]) def index(): return render_template("index.html") @app.route("/dashboard", methods=["GET"]) def dashboard(): logs_list = get_ui_logs() or [] return render_template( "dashboard/dashboard.html", books=list_active_books(), # Redis registered=get_registered_books(), # SQLite INIT results logs=logs_list, ) @app.route("/book/") def book_detail(book_id): title = r.get(f"book:{book_id}:title") or book_id return render_template( "dashboard/book_detail.html", book_id=book_id, title=title, logs=get_ui_logs(), ) # ===================================================== # SECTION 2 — ACTION ROUTES (INIT, START, ABORT) # ===================================================== # CORRECT PATH — services/ is root-level @app.route("/init", methods=["POST"]) def init_book(): """ INIT-flow: - user enters URL - lightweight metadata fetch - insert into SQLite as 'registered' - return dashboard HTML (NOT JSON) """ url = request.form.get("url", "").strip() if not url: return render_template( "dashboard/dashboard.html", error="Geen URL opgegeven.", books=list_active_books(), registered=get_registered_books(), logs=get_ui_logs(), ) try: result = InitService.execute(url) msg = f"Boek geregistreerd: {result.get('title')}" return render_template( "dashboard/dashboard.html", message=msg, books=list_active_books(), # Redis registered=get_registered_books(), # SQLite INIT results logs=get_ui_logs(), ) except Exception as e: log_debug(f"[INIT] ERROR: {e}") return render_template( "dashboard/dashboard.html", error=f"INIT mislukt: {e}", books=list_active_books(), registered=get_registered_books(), logs=get_ui_logs(), ) @app.route("/start", methods=["POST"]) def start_scraping(): url = request.form.get("url", "").strip() if not url: return render_template( "dashboard/dashboard.html", error="Geen URL opgegeven.", books=list_active_books(), registered=get_registered_books(), logs=get_ui_logs(), ) reset_ui_logs() log_debug(f"[WEB] Scraping via Celery: {url}") async_result = celery_app.send_task( "scraper.tasks.scraping.start_scrape_book", args=[url], queue="scraping", ) return render_template( "dashboard/dashboard.html", scraping_task_id=async_result.id, books=list_active_books(), registered=get_registered_books(), logs=get_ui_logs(), ) @app.route("/abort/", methods=["POST"]) def abort_download(book_id): log_debug(f"[WEB] Abort requested for book: {book_id}") set_abort(book_id) return jsonify({"status": "ok", "aborted": book_id}) # ===================================================== # SECTION 3 — API ROUTES (JSON) # ===================================================== @app.route("/api/books") def api_books(): return jsonify(list_active_books()) @app.route("/api/book//status") def api_book_status(book_id): return jsonify(getStatus(book_id)) @app.route("/api/book//logs") def api_book_logs(book_id): logs = r.lrange(f"logs:{book_id}", 0, -1) or [] return jsonify(logs) @app.route("/progress/") def progress(book_id): return jsonify(get_progress(book_id)) @app.route("/celery-result/") def celery_result(task_id): result = AsyncResult(task_id, app=celery_app) if result.successful(): return jsonify({"ready": True, "result": result.get()}) if result.failed(): return jsonify({"ready": True, "error": "failed"}) return jsonify({"ready": False}) @app.route("/clear-logs", methods=["POST"]) def clear_logs(): reset_ui_logs() return jsonify({"status": "ok", "message": "UI logs cleared"}) @app.route("/logs", methods=["GET"]) def logs(): try: last_index = int(request.args.get("last_index", -1)) except: last_index = -1 new_lines, total = get_ui_logs_delta(last_index) return jsonify({"lines": new_lines, "total": total}) # ===================================================== # SECTION 4 — DEBUG ROUTES # ===================================================== @app.route("/debug/redis-keys") def debug_redis_keys(): cursor = 0 results = {} while True: cursor, keys = r.scan(cursor, match="*", count=200) for k in keys: try: results[k] = r.get(k) except: results[k] = "" if cursor == 0: break return jsonify(results) # ===================================================== # DB DEBUG: LIST ALL BOOKS FROM SQLITE # ===================================================== from db.repository import fetch_all_books @app.route("/api/db/books") def api_db_books(): """ Return ALL books stored in SQLite — including INIT-only entries. Useful to verify that /init wrote correct metadata. """ try: books = fetch_all_books() return jsonify({"status": "ok", "books": books}) except Exception as e: return jsonify({"status": "error", "message": str(e)}), 500 # ===================================================== # SECTION 5 — INTERNAL HELPERS # ===================================================== def getStatus(book_id): state = r.hgetall(f"book:{book_id}:state") status = state.get("status") or "unknown" dl_done = int(state.get("chapters_download_done", 0)) dl_skipped = int(state.get("chapters_download_skipped", 0)) dl_total = int(state.get("chapters_total", 0)) au_done = int(state.get("audio_done") or 0) title = state.get("title") or book_id return { "book_id": book_id, "title": title, "status": status, "download_done": dl_done, "download_skipped": dl_skipped, "download_total": dl_total, "audio_done": au_done, "audio_total": dl_total, } def list_active_books(): books = [] for key in r.scan_iter(match="book:*:state", count=1000): first = key.find(":") second = key.find(":", first + 1) book_id = key[first + 1 : second] books.append(getStatus(book_id)) return books # ===================================================== # SECTION 6 — FLASK RUNNER # ===================================================== if __name__ == "__main__": debug = os.getenv("FLASK_DEBUG", "0") == "1" host = os.getenv("HOST", "0.0.0.0") port = int(os.getenv("PORT", "5000")) log_debug(f"[WEB] Starting Flask server on {host}:{port}, debug={debug}") app.run(host=host, port=port, debug=debug)