You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
88 lines
2.4 KiB
88 lines
2.4 KiB
# ============================================
|
|
# File: bookscraper/app.py (OPTION A — Sync Scraping)
|
|
# ============================================
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
|
|
print(">>> [WEB] Importing celery_app …")
|
|
from celery_app import celery_app
|
|
|
|
from scraper.logger import log_debug
|
|
from flask import Flask, render_template, request
|
|
|
|
# Import SCRAPER (sync)
|
|
from scraper.book_scraper import BookScraper
|
|
from scraper.sites import BookSite
|
|
|
|
# Import Download Controller
|
|
from scraper.download_controller import DownloadController
|
|
|
|
app = Flask(__name__)
|
|
|
|
|
|
@app.route("/", methods=["GET"])
|
|
def index():
|
|
return render_template("index.html")
|
|
|
|
|
|
@app.route("/start", methods=["POST"])
|
|
def start_scraping():
|
|
url = request.form.get("url", "").strip()
|
|
|
|
if not url:
|
|
return render_template("result.html", error="Geen URL opgegeven.")
|
|
|
|
log_debug(f"[WEB] Sync scraping for: {url}")
|
|
|
|
# -----------------------------------------------
|
|
# 1. SCRAPE DIRECT (NIET via Celery)
|
|
# -----------------------------------------------
|
|
site = BookSite()
|
|
scraper = BookScraper(site, url)
|
|
scrape_result = scraper.execute() # DIT GEEFT METADATA + CHAPTERLIST
|
|
|
|
# -----------------------------------------------
|
|
# 2. DOWNLOAD PIPELINE STARTEN VIA CELERY
|
|
# -----------------------------------------------
|
|
controller = DownloadController(scrape_result)
|
|
job = controller.start()
|
|
|
|
# -----------------------------------------------
|
|
# 3. TEMPLATE RENDEREN (VOLLEDIG GEVULD)
|
|
# -----------------------------------------------
|
|
return render_template(
|
|
"result.html",
|
|
book=scrape_result,
|
|
download_job_id=job.id,
|
|
)
|
|
|
|
|
|
# ABORT ROUTE (blijft hetzelfde)
|
|
from scraper.abort import set_abort, clear_abort
|
|
|
|
|
|
@app.route("/abort/<book_id>", methods=["POST"])
|
|
def abort_download(book_id):
|
|
log_debug(f"[WEB] Abort requested for book: {book_id}")
|
|
set_abort(book_id)
|
|
|
|
return render_template(
|
|
"result.html",
|
|
aborted=True,
|
|
book={"title": book_id, "author": "", "chapters": []},
|
|
message=f"Abort requested voor boek: {book_id}",
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import os
|
|
|
|
debug = os.getenv("FLASK_DEBUG", "0") == "1"
|
|
host = os.getenv("HOST", "0.0.0.0")
|
|
port = int(os.getenv("PORT", "5000"))
|
|
|
|
log_debug(f"[WEB] Starting Flask server on {host}:{port}, debug={debug}")
|
|
app.run(host=host, port=port, debug=debug)
|