You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/app.py

143 lines
4.2 KiB

# ============================================
# File: bookscraper/app.py (ASYNC SCRAPING)
# ============================================
from dotenv import load_dotenv
load_dotenv()
print(">>> [WEB] Importing celery_app …")
from celery_app import celery_app
from flask import Flask, render_template, request, jsonify
from scraper.logger import log_debug
# Abort + Progress (per book_id)
from scraper.abort import set_abort
from scraper.progress import get_progress
# UI LOGS (GLOBAL — no book_id)
from scraper.ui_log import get_ui_logs, reset_ui_logs # <-- ADDED
from celery.result import AsyncResult
# ⬇⬇⬇ TOEGEVOEGD voor cover-serving
from flask import send_from_directory
import os
app = Flask(__name__)
# =====================================================
# STATIC FILE SERVING FOR OUTPUT ← TOEGEVOEGD
# =====================================================
OUTPUT_ROOT = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "output")
@app.route("/output/<path:filename>")
def serve_output(filename):
"""Serve output files such as cover.jpg and volumes."""
return send_from_directory(OUTPUT_ROOT, filename, as_attachment=False)
# =====================================================
# HOME PAGE
# =====================================================
@app.route("/", methods=["GET"])
def index():
return render_template("index.html")
# =====================================================
# START SCRAPING (async via Celery)
# =====================================================
@app.route("/start", methods=["POST"])
def start_scraping():
url = request.form.get("url", "").strip()
if not url:
return render_template("result.html", error="Geen URL opgegeven.")
# ---------------------------------------------------------
# NEW: Clear UI log buffer when starting a new scrape
# ---------------------------------------------------------
reset_ui_logs()
log_debug(f"[WEB] Scraping via Celery: {url}")
async_result = celery_app.send_task(
"scraper.tasks.scraping.start_scrape_book",
args=[url],
queue="scraping",
)
return render_template(
"result.html",
message="Scraping gestart.",
scraping_task_id=async_result.id,
book_title=None,
)
# =====================================================
# CLEAR UI LOGS MANUALLY (NEW)
# =====================================================
@app.route("/clear-logs", methods=["POST"])
def clear_logs():
reset_ui_logs()
return jsonify({"status": "ok", "message": "UI logs cleared"})
# =====================================================
# ABORT (per book_id)
# =====================================================
@app.route("/abort/<book_id>", methods=["POST"])
def abort_download(book_id):
log_debug(f"[WEB] Abort requested for book: {book_id}")
set_abort(book_id)
return jsonify({"status": "ok", "aborted": book_id})
# =====================================================
# PROGRESS (per book_id)
# =====================================================
@app.route("/progress/<book_id>", methods=["GET"])
def progress(book_id):
return jsonify(get_progress(book_id))
# =====================================================
# LOGS — GLOBAL UI LOGS
# =====================================================
@app.route("/logs", methods=["GET"])
def logs():
return jsonify({"logs": get_ui_logs()})
# =====================================================
# CELERY RESULT → return book_id when scraping finishes
# =====================================================
@app.route("/celery-result/<task_id>", methods=["GET"])
def celery_result(task_id):
result = AsyncResult(task_id, app=celery_app)
if result.successful():
return jsonify({"ready": True, "result": result.get()})
if result.failed():
return jsonify({"ready": True, "error": "failed"})
return jsonify({"ready": False})
# =====================================================
# RUN FLASK
# =====================================================
if __name__ == "__main__":
debug = os.getenv("FLASK_DEBUG", "0") == "1"
host = os.getenv("HOST", "0.0.0.0")
port = int(os.getenv("PORT", "5000"))
log_debug(f"[WEB] Starting Flask server on {host}:{port}, debug={debug}")
app.run(host=host, port=port, debug=debug)