# ============================================ # File: bookscraper/app.py (ASYNC SCRAPING) # ============================================ from dotenv import load_dotenv load_dotenv() print(">>> [WEB] Importing celery_app …") from celery_app import celery_app from flask import Flask, render_template, request, jsonify from scraper.logger import log_debug # Abort + Progress (per book_id) from scraper.abort import set_abort from scraper.progress import get_progress # UI LOGS (GLOBAL — no book_id) from scraper.ui_log import get_ui_logs, reset_ui_logs # <-- ADDED from celery.result import AsyncResult # ⬇⬇⬇ TOEGEVOEGD voor cover-serving from flask import send_from_directory import os app = Flask(__name__) # ===================================================== # STATIC FILE SERVING FOR OUTPUT ← TOEGEVOEGD # ===================================================== OUTPUT_ROOT = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output") @app.route("/output/") def serve_output(filename): """Serve output files such as cover.jpg and volumes.""" return send_from_directory(OUTPUT_ROOT, filename, as_attachment=False) # ===================================================== # HOME PAGE # ===================================================== @app.route("/", methods=["GET"]) def index(): return render_template("index.html") # ===================================================== # START SCRAPING (async via Celery) # ===================================================== @app.route("/start", methods=["POST"]) def start_scraping(): url = request.form.get("url", "").strip() if not url: return render_template("result.html", error="Geen URL opgegeven.") # --------------------------------------------------------- # NEW: Clear UI log buffer when starting a new scrape # --------------------------------------------------------- reset_ui_logs() log_debug(f"[WEB] Scraping via Celery: {url}") async_result = celery_app.send_task( "scraper.tasks.scraping.start_scrape_book", args=[url], queue="scraping", ) return render_template( "result.html", message="Scraping gestart.", scraping_task_id=async_result.id, book_title=None, ) # ===================================================== # CLEAR UI LOGS MANUALLY (NEW) # ===================================================== @app.route("/clear-logs", methods=["POST"]) def clear_logs(): reset_ui_logs() return jsonify({"status": "ok", "message": "UI logs cleared"}) # ===================================================== # ABORT (per book_id) # ===================================================== @app.route("/abort/", methods=["POST"]) def abort_download(book_id): log_debug(f"[WEB] Abort requested for book: {book_id}") set_abort(book_id) return jsonify({"status": "ok", "aborted": book_id}) # ===================================================== # PROGRESS (per book_id) # ===================================================== @app.route("/progress/", methods=["GET"]) def progress(book_id): return jsonify(get_progress(book_id)) # ===================================================== # LOGS — GLOBAL UI LOGS # ===================================================== @app.route("/logs", methods=["GET"]) def logs(): return jsonify({"logs": get_ui_logs()}) # ===================================================== # CELERY RESULT → return book_id when scraping finishes # ===================================================== @app.route("/celery-result/", methods=["GET"]) def celery_result(task_id): result = AsyncResult(task_id, app=celery_app) if result.successful(): return jsonify({"ready": True, "result": result.get()}) if result.failed(): return jsonify({"ready": True, "error": "failed"}) return jsonify({"ready": False}) # ===================================================== # RUN FLASK # ===================================================== if __name__ == "__main__": debug = os.getenv("FLASK_DEBUG", "0") == "1" host = os.getenv("HOST", "0.0.0.0") port = int(os.getenv("PORT", "5000")) log_debug(f"[WEB] Starting Flask server on {host}:{port}, debug={debug}") app.run(host=host, port=port, debug=debug)