bookscraper new

celery-integration
peter.fong 2 weeks ago
parent da4d32bc72
commit 158cb63d54

@ -0,0 +1,53 @@
from flask import Flask, request, render_template_string
from scraper.book_scraper import BookScraper
from scraper.sites import BookSite
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
app = Flask(__name__)
# --- GET: toon formulier ---
@app.route("/", methods=["GET"])
def index():
return render_template_string("""
<html>
<body>
<h2>BookScraper</h2>
<form method="post">
<label>Book URL:</label><br>
<input name="url" style="width:400px"><br>
<button type="submit">Scrape</button>
</form>
</body>
</html>
""")
# --- POST: scraper uitvoeren ---
@app.route("/", methods=["POST"])
def run_scraper():
url = request.form.get("url")
site = BookSite()
scraper = BookScraper(site, url)
result = scraper.execute()
return render_template_string("""
<html>
<body>
<h2>Scrape result: {{title}}</h2>
<h3>Debug output:</h3>
<pre style='background:#111;color:#0f0;padding:10px;border-radius:8px'>
{{debug}}
</pre>
<p><a href="/">Terug</a></p>
</body>
</html>
""", title=result["title"], debug=result["debug"])
if __name__ == "__main__":
app.run(debug=True)

@ -0,0 +1,44 @@
#!/usr/bin/env bash
set -e
echo "📂 Creating Flask BookScraper structure in current directory..."
# --- Create folders ---
mkdir -p scraper
mkdir -p templates
mkdir -p static
# --- Create empty files ---
touch app.py
touch scraper/__init__.py
touch scraper/scraper.py
touch scraper/sites.py
touch scraper/utils.py
touch templates/index.html
touch templates/result.html
touch static/.keep # empty placeholder to keep folder under git
# --- Optional: auto-create requirements file ---
cat <<EOF > requirements.txt
flask
requests
beautifulsoup4
lxml
pillow
EOF
echo "🎉 Structure created successfully!"
# Show structure
echo
if command -v tree >/dev/null 2>&1; then
tree .
else
echo "Install 'tree' for pretty output. Current structure:"
ls -R .
fi

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

@ -0,0 +1,5 @@
flask
requests
beautifulsoup4
lxml
pillow

@ -0,0 +1,269 @@
import requests
import os
import time
from pathlib import Path
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
from PIL import Image
from io import BytesIO
from dotenv import load_dotenv
from scraper.logger import setup_logger, LOG_BUFFER
from scraper.utils import clean_text, load_replacements
load_dotenv()
logger = setup_logger()
class Chapter:
def __init__(self, number, title, url):
self.number = number
self.title = title
self.url = url
self.text = ""
class BookScraper:
def __init__(self, site, url):
self.site = site
self.url = url
self.book_title = ""
self.book_author = ""
self.book_description = ""
self.cover_url = ""
self.chapters = []
self.chapter_base = None
self.base_path = None
# ENV settings
self.DRY_RUN = os.getenv("DRY_RUN", "0") == "1"
self.TEST_CHAPTER_LIMIT = int(os.getenv("TEST_CHAPTER_LIMIT", "10"))
self.MAX_VOL_SIZE = int(os.getenv("MAX_VOL_SIZE", "1500"))
self.MAX_DL_PER_SEC = int(os.getenv("MAX_DL_PER_SEC", "2"))
# Load text replacements
self.replacements = load_replacements("replacements.txt")
# -----------------------------------------------------
def execute(self):
LOG_BUFFER.seek(0)
LOG_BUFFER.truncate(0)
logger.debug("Starting scraper for %s", self.url)
soup = self.get_document(self.url)
self.parse_title(soup)
self.parse_author(soup)
self.parse_description(soup)
self.parse_cover(soup)
self.prepare_output_folder()
chapter_page = self.get_chapter_page(soup)
self.parse_chapter_links(chapter_page)
if self.DRY_RUN:
logger.debug(
"DRY RUN → downloading only first %s chapters", self.TEST_CHAPTER_LIMIT)
self.get_some_chapters(self.TEST_CHAPTER_LIMIT)
else:
self.get_all_chapters()
self.split_into_volumes()
return {
"title": self.book_title,
"debug": LOG_BUFFER.getvalue()
}
# -----------------------------------------------------
# NETWORK
# -----------------------------------------------------
def get_document(self, url):
logger.debug("GET %s", url)
time.sleep(1 / max(1, self.MAX_DL_PER_SEC))
resp = requests.get(
url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
resp.encoding = self.site.encoding
logger.debug("HTTP %s for %s", resp.status_code, url)
return BeautifulSoup(resp.text, "lxml")
# -----------------------------------------------------
# BASIC PARSERS (piaotia structure)
# -----------------------------------------------------
def parse_title(self, soup):
h1 = soup.find("h1")
if h1:
self.book_title = h1.get_text(strip=True)
else:
self.book_title = "UnknownTitle"
logger.debug("Book title: %s", self.book_title)
def parse_author(self, soup):
td = soup.find("td", string=lambda t: t and "" in t and "" in t)
if td:
raw = td.get_text(strip=True)
if "" in raw:
self.book_author = raw.split("", 1)[1].strip()
else:
self.book_author = "UnknownAuthor"
else:
self.book_author = "UnknownAuthor"
logger.debug("Book author: %s", self.book_author)
def parse_description(self, soup):
span = soup.find("span", string=lambda t: t and "内容简介" in t)
if not span:
self.book_description = ""
return
parts = []
for sib in span.next_siblings:
if getattr(sib, "name", None) == "span":
break
txt = sib.get_text(strip=True) if not isinstance(
sib, str) else sib.strip()
if txt:
parts.append(txt)
self.book_description = "\n".join(parts)
logger.debug("Description parsed (%s chars)",
len(self.book_description))
def parse_cover(self, soup):
selector = (
"html > body > div:nth-of-type(6) > div:nth-of-type(2) > div > table "
"> tr:nth-of-type(4) > td:nth-of-type(1) > table > tr:nth-of-type(1) "
"> td:nth-of-type(2) > a:nth-of-type(1) > img"
)
img = soup.select_one(selector)
if img:
self.cover_url = urljoin(self.site.root, img.get("src"))
else:
logger.debug("Cover not found!")
logger.debug("Cover URL = %s", self.cover_url)
# -----------------------------------------------------
def prepare_output_folder(self):
output_root = os.getenv("OUTPUT_DIR", "./output")
self.base_path = Path(output_root) / self.book_title / self.site.name
self.base_path.mkdir(parents=True, exist_ok=True)
logger.debug("Output directory: %s", self.base_path)
if self.cover_url:
self.save_image(self.cover_url, self.base_path / "cover.jpg")
def save_image(self, url, path):
logger.debug("Downloading cover: %s", url)
resp = requests.get(
url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
if resp.status_code == 200:
img = Image.open(BytesIO(resp.content))
img.save(path)
logger.debug("Cover saved to %s", path)
# -----------------------------------------------------
# CHAPTER PAGE
# -----------------------------------------------------
def get_chapter_page(self, soup):
node = soup.select_one(
"html > body > div:nth-of-type(6) > div:nth-of-type(2) > div > table")
link = node.select_one("a")
href = link.get("href")
chapter_url = urljoin(self.site.root, href)
parsed = urlparse(chapter_url)
base = parsed.path.rsplit("/", 1)[0] + "/"
self.chapter_base = f"{parsed.scheme}://{parsed.netloc}{base}"
logger.debug("Chapter index URL = %s", chapter_url)
logger.debug("CHAPTER_BASE = %s", self.chapter_base)
return self.get_document(chapter_url)
def parse_chapter_links(self, soup):
container = soup.select_one("div.centent")
links = container.select("ul li a[href]")
for i, a in enumerate(links, 1):
href = a.get("href")
if not href.endswith(".html"):
continue
abs_url = urljoin(self.chapter_base, href)
title = a.get_text(strip=True)
self.chapters.append(Chapter(i, title, abs_url))
logger.debug("Total chapters: %s", len(self.chapters))
# -----------------------------------------------------
# DOWNLOAD CHAPTERS
# -----------------------------------------------------
def get_all_chapters(self):
for ch in self.chapters:
ch.text = self.fetch_chapter(ch)
logger.debug("CH %s length = %s", ch.number, len(ch.text))
def get_some_chapters(self, limit):
for ch in self.chapters[:limit]:
ch.text = self.fetch_chapter(ch)
filename = self.base_path / f"{ch.number:05d}_{ch.title}.txt"
filename.write_text(ch.text, encoding="utf-8")
logger.debug("Saved test chapter: %s", filename)
def fetch_chapter(self, ch):
soup = self.get_document(ch.url)
text = self.parse_chapter_text(soup)
return clean_text(text, self.replacements)
def parse_chapter_text(self, soup):
body = soup.body
h1 = body.find("h1")
parts = []
collecting = False
for sib in h1.next_siblings:
if getattr(sib, "get", None) and sib.get("class") == ["bottomlink"]:
break
if getattr(sib, "get", None) and sib.get("class") == ["toplink"]:
continue
if getattr(sib, "name", None) in ["script", "style"]:
continue
if not collecting:
if getattr(sib, "name", None) == "br":
collecting = True
continue
txt = sib.strip() if isinstance(sib, str) else sib.get_text("\n", strip=True)
if txt:
parts.append(txt)
return "\n".join(parts).strip()
# -----------------------------------------------------
# SPLIT VOLUMES
# -----------------------------------------------------
def split_into_volumes(self):
logger.debug(
"Splitting into volumes (max %s chapters per volume)", self.MAX_VOL_SIZE)
chapters = len(self.chapters)
volume = 1
index = 0
while index < chapters:
chunk = self.chapters[index:index + self.MAX_VOL_SIZE]
volume_dir = self.base_path / f"v{volume}"
volume_dir.mkdir(exist_ok=True)
for ch in chunk:
filename = volume_dir / f"{ch.number:05d}_{ch.title}.txt"
filename.write_text(ch.text, encoding="utf-8")
logger.debug("Volume %s saved (%s chapters)", volume, len(chunk))
volume += 1
index += self.MAX_VOL_SIZE

@ -0,0 +1,27 @@
# scraper/logger.py
import logging
from io import StringIO
# In-memory buffer returned to web UI
LOG_BUFFER = StringIO()
def setup_logger():
logger = logging.getLogger("bookscraper")
logger.setLevel(logging.DEBUG)
logger.handlers = [] # voorkomen dubbele handlers bij reload
# Console handler
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
ch.setFormatter(logging.Formatter("[%(levelname)s] %(message)s"))
# Buffer handler for returning to UI
mh = logging.StreamHandler(LOG_BUFFER)
mh.setLevel(logging.DEBUG)
mh.setFormatter(logging.Formatter("[%(levelname)s] %(message)s"))
logger.addHandler(ch)
logger.addHandler(mh)
return logger

@ -0,0 +1,11 @@
class BookSite:
def __init__(self):
self.name = "piaotian"
self.root = "https://www.ptwxz.com"
self.chapter_list_selector = "div.centent"
self.encoding = "gb2312"
self.replacements = {
"&nbsp;&nbsp;": "\n",
"手机用户请访问http://m.piaotian.net": "",
"(新飘天文学www.piaotian.cc )": "",
}

@ -0,0 +1,22 @@
import os
# scraper/utils.py
def load_replacements(path):
repl = {}
if not path or not os.path.exists(path):
return repl
with open(path, encoding="utf-8") as f:
for line in f:
if "=>" in line:
k, v = line.strip().split("=>", 1)
repl[k.strip()] = v.strip()
return repl
def clean_text(text, repl_dict):
for src, tgt in repl_dict.items():
text = text.replace(src, tgt)
return text

@ -0,0 +1,22 @@
<!DOCTYPE html>
<html>
<head>
<title>Book Scraper</title>
</head>
<body>
<h1>Book Scraper</h1>
{% if error %}
<p style="color:red">{{ error }}</p>
{% endif %}
<form method="post">
<label for="book_url">Enter Book URL:</label><br><br>
<input type="text" id="book_url" name="book_url" style="width:400px">
<br><br>
<button type="submit">Scrape</button>
</form>
</body>
</html>

@ -0,0 +1,18 @@
<!DOCTYPE html>
<html>
<head>
<title>Scrape Done</title>
</head>
<body>
<h1>Scrape Complete</h1>
<p><strong>Book title:</strong> {{ title }}</p>
<p><strong>Output folder:</strong></p>
<pre>{{ basepath }}</pre>
<a href="/">Scrape another book</a>
</body>
</html>

@ -0,0 +1,79 @@
# ---------- BASIC HTML CLEANUP ----------
&nbsp;=
\xa0=
\u3000=
\r=
\t=
\ufeff=
<br>=
<br />=
<br/>=
# dubbele spaties weg
=
# ---------- WEBSITE NOISE ----------
飘天文学=
飘天文学网=
小说阅读网=
阅读更多小说最新章节请返回飘天文学网首页=
返回飘天文学网首页=
永久地址www.piaotia.com=
www.piaotia.com=
piaotia.com=
piaotian.com=
www.piaotian.com=
www.piaotian.net=
# ---------- NAVIGATION ----------
上一章=
下一章=
返回目录=
返回书页=
上一页=
下一页=
章节目录=
加入书架=
加入书签=
推荐本书=
我的书架=
(快捷键 ←)=
(快捷键 →)=
快捷键=
←=
→=
# ---------- COPYRIGHT / DISCLAIMER ----------
重要声明=
版权=
All rights reserved=
Copyright=
Copyright ©=
版权所有=
本小说来自互联网资源,如果侵犯您的权益请联系我们=
本站立场无关=
均由网友发表或上传=
# ---------- COMMON NOISE ----------
广告=
广告位=
手机阅读请访问=
章节错误请点击举报=
举报原因如下=
章节错误=
报错=
错误章节=
# ---------- ASCII CLEANUP ----------
“="
”="
='
='
—=-
=-
…=...
# ---------- KNOWN GARBAGE STRINGS ----------
=
=

@ -0,0 +1,53 @@
import requests
from requests.auth import HTTPBasicAuth
import os
import argparse
# Invoerparameters
parser = argparse.ArgumentParser(
description='Genereer TOC uit Azure DevOps Wiki-structuur')
parser.add_argument('--max-depth', type=int, default=3,
help='Maximale diepte van de TOC (standaard: 3)')
args = parser.parse_args()
# Configuratie
organization = "hhdelfland"
project = "Delfland.EAM_OBS_beheer"
wiki = "Delfland.EAM_OBS_beheer.wiki"
pat = os.getenv(
"AZURE_PAT") or "14S2VfW2iYhpYHC90zL64JHVy9Fst10qIbg2Dw5erzPT3FH8x6J9JQQJ99BEACAAAAA3TYdMAAASAZDO43sH"
url = f"https://dev.azure.com/{organization}/{project}/_apis/wiki/wikis/{wiki}/pages?api-version=7.1-preview.1&recursionLevel=full"
auth = HTTPBasicAuth('', pat)
# API-aanroep
response = requests.get(url, auth=auth)
# Functie om TOC te genereren met max diepte
def generate_toc(pages, depth=1, max_depth=3):
if depth > max_depth:
return ""
toc = ""
for page in sorted(pages, key=lambda p: p.get("order", 0)):
title = page["path"].split("/")[-1].replace("-", " ")
link = page["path"].strip("/").replace(" ", "%20") + ".md"
indent = " " * (depth - 1)
toc += f"{indent}- [{title}]({link})\n"
if "subPages" in page and page["subPages"]:
toc += generate_toc(page["subPages"], depth + 1, max_depth)
return toc
# Resultaat tonen
if response.status_code == 200:
data = response.json()
toc = generate_toc(data.get("subPages", []),
depth=5, max_depth=args.max_depth)
print("# Wiki TOC\n")
print(toc)
else:
print("Fout bij ophalen wiki-pagina's:", response.status_code)
print(response.text)

@ -0,0 +1,36 @@
import requests
from requests.auth import HTTPBasicAuth
import os
organization = "hhdelfland"
project = "Delfland.EAM_OBS_beheer"
wiki = "Delfland.EAM_OBS_beheer.wiki"
pat = os.getenv(
"AZURE_PAT") or "PLAK JE ACCESS TOKEN HIER TUSSEN DE AANHALINGSTEKENS"
url = f"https://dev.azure.com/{organization}/{project}/_apis/wiki/wikis/{wiki}/pages?api-version=7.1-preview.1&recursionLevel=full"
auth = HTTPBasicAuth('', pat)
response = requests.get(url, auth=auth)
def generate_toc(pages, indent=0):
toc = ""
for page in pages:
title = page["path"].split("/")[-1]
url = page.get("remoteUrl")
if url:
toc += " " * indent + f"- [{title}]({url})\n"
if "subPages" in page:
toc += generate_toc(page["subPages"], indent + 1)
return toc
if response.status_code == 200:
data = response.json()
toc = generate_toc(data.get("subPages", []))
print("# Wiki TOC\n")
print(toc)
else:
print("Fout bij ophalen wiki-pagina's:", response.status_code)
print(response.text)

@ -0,0 +1,47 @@
#!/bin/bash
# Projectnaam
PROJECT_NAME="po-quest"
# Maak de projectmappen aan
echo "📁 Maken van projectstructuur..."
mkdir -p $PROJECT_NAME
mkdir -p $PROJECT_NAME/routes
mkdir -p $PROJECT_NAME/templates
mkdir -p $PROJECT_NAME/static/css
mkdir -p $PROJECT_NAME/static/js
# Maak bestanden aan in de hoofdmap
touch $PROJECT_NAME/app.py
touch $PROJECT_NAME/config.py
touch $PROJECT_NAME/extensions.py
touch $PROJECT_NAME/models.py
touch $PROJECT_NAME/forms.py
touch $PROJECT_NAME/database.py
touch $PROJECT_NAME/requirements.txt
touch $PROJECT_NAME/.gitignore
# Maak de bestanden in de routes-map
touch $PROJECT_NAME/routes/__init__.py
touch $PROJECT_NAME/routes/main.py
touch $PROJECT_NAME/routes/admin.py
# Maak de HTML templates aan
touch $PROJECT_NAME/templates/base.html
touch $PROJECT_NAME/templates/index.html
touch $PROJECT_NAME/templates/admin.html
touch $PROJECT_NAME/templates/edit_question.html
touch $PROJECT_NAME/templates/edit_choice.html
# Schrijf de benodigde pakketten naar requirements.txt
echo "flask" > $PROJECT_NAME/requirements.txt
echo "flask_sqlalchemy" >> $PROJECT_NAME/requirements.txt
echo "flask_wtf" >> $PROJECT_NAME/requirements.txt
echo "wtforms" >> $PROJECT_NAME/requirements.txt
# Maak een .gitignore bestand
echo "__pycache__/" > $PROJECT_NAME/.gitignore
echo "*.sqlite3" >> $PROJECT_NAME/.gitignore
echo "*.db" >> $PROJECT_NAME/.gitignore
echo "✅ Setup voltooid! Je kunt starten met ontwikkelen in de map '$PROJECT_NAME'."

@ -0,0 +1,3 @@
__pycache__/
*.sqlite3
*.db

@ -0,0 +1,17 @@
from flask import Flask
from extensions import db
from routes.admin import admin_bp
app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///po_quest.db'
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
app.secret_key = 'supersecretkey'
# Initialiseer de database
db.init_app(app)
# Registreer de blueprint
app.register_blueprint(admin_bp)
if __name__ == "__main__":
app.run(debug=True)

@ -0,0 +1,7 @@
import os
class Config:
SECRET_KEY = os.getenv("SECRET_KEY", "supersecretkey")
SQLALCHEMY_DATABASE_URI = "sqlite:///database.db"
SQLALCHEMY_TRACK_MODIFICATIONS = False

@ -0,0 +1,6 @@
from app import app
from extensions import db
with app.app_context():
db.create_all()
print("✅ Database is aangemaakt!")

@ -0,0 +1,3 @@
from flask_sqlalchemy import SQLAlchemy
db = SQLAlchemy()

@ -0,0 +1,14 @@
# forms.py
from flask_wtf import FlaskForm
from wtforms import StringField, SelectField
from wtforms.validators import DataRequired
from models import Question # Zorg ervoor dat Question correct geïmporteerd wordt
class QuestionForm(FlaskForm):
text = StringField('Vraag', validators=[DataRequired()])
class ChoiceForm(FlaskForm):
text = StringField('Keuze', validators=[DataRequired()])
next_question = SelectField('Volgende vraag', coerce=int)

@ -0,0 +1,20 @@
from extensions import db
class Question(db.Model):
id = db.Column(db.Integer, primary_key=True)
text = db.Column(db.String(255), nullable=False)
choices = db.relationship('Choice', backref='question', lazy=True)
class Choice(db.Model):
id = db.Column(db.Integer, primary_key=True)
text = db.Column(db.String(255), nullable=False)
question_id = db.Column(db.Integer, db.ForeignKey(
'question.id'), nullable=False)
# Nieuwe kolom voor de volgende vraag
next_question_id = db.Column(
db.Integer, db.ForeignKey('question.id'), nullable=True)
next_question = db.relationship('Question', foreign_keys=[
next_question_id], backref='previous_choices')

@ -0,0 +1,4 @@
flask
flask_sqlalchemy
flask_wtf
wtforms

@ -0,0 +1,94 @@
from flask import Blueprint, render_template, request, redirect, url_for
from flask_sqlalchemy import SQLAlchemy
from models import db, Question, Choice
from forms import ChoiceForm, QuestionForm
# Maak de blueprint aan
admin_bp = Blueprint('admin', __name__)
# Route voor het admin-dashboard
@admin_bp.route('/')
def admin_dashboard():
# Haal alle vragen en keuzes op uit de database
questions = Question.query.all()
return render_template('admin_dashboard.html', questions=questions)
# Route voor het toevoegen van een vraag
@admin_bp.route('/add_question', methods=['GET', 'POST'])
def add_question():
form = QuestionForm()
if form.validate_on_submit():
question_text = form.text.data
new_question = Question(text=question_text)
db.session.add(new_question)
db.session.commit()
return redirect(url_for('admin.admin_dashboard'))
return render_template('add_question.html', form=form)
# Route voor het bewerken van een vraag
@admin_bp.route('/edit_question/<int:question_id>', methods=['GET', 'POST'])
def edit_question(question_id):
question = Question.query.get_or_404(question_id)
form = QuestionForm(obj=question)
if form.validate_on_submit():
question.text = form.text.data
db.session.commit()
return redirect(url_for('admin.admin_dashboard'))
return render_template('edit_question.html', form=form, question=question)
# Route voor het verwijderen van een vraag
@admin_bp.route('/delete_question/<int:question_id>', methods=['POST'])
def delete_question(question_id):
question = Question.query.get_or_404(question_id)
db.session.delete(question)
db.session.commit()
return redirect(url_for('admin.admin_dashboard'))
# Route voor het toevoegen van een keuze aan een vraag
@admin_bp.route('/add_choice/<int:question_id>', methods=['GET', 'POST'])
def add_choice(question_id):
form = ChoiceForm()
question = Question.query.get_or_404(question_id)
if form.validate_on_submit():
choice_text = form.text.data
next_question_id = form.next_question.data if form.next_question.data else None
new_choice = Choice(
text=choice_text, question_id=question.id, next_question_id=next_question_id)
db.session.add(new_choice)
db.session.commit()
return redirect(url_for('admin.edit_question', question_id=question.id))
return render_template('add_choice.html', form=form, question=question)
# Route voor het bewerken van een keuze
@admin_bp.route('/edit_choice/<int:choice_id>', methods=['GET', 'POST'])
def edit_choice(choice_id):
choice = Choice.query.get_or_404(choice_id)
form = ChoiceForm(obj=choice)
if form.validate_on_submit():
choice.text = form.text.data
choice.next_question_id = form.next_question.data if form.next_question.data else None
db.session.commit()
return redirect(url_for('admin.edit_question', question_id=choice.question_id))
return render_template('edit_choice.html', form=form, choice=choice)
# Route voor het verwijderen van een keuze
@admin_bp.route('/delete_choice/<int:choice_id>', methods=['POST'])
def delete_choice(choice_id):
choice = Choice.query.get_or_404(choice_id)
db.session.delete(choice)
db.session.commit()
return redirect(url_for('admin.edit_question', question_id=choice.question_id))

@ -0,0 +1,10 @@
from flask import Blueprint, render_template
from models import Question
main_bp = Blueprint("main", __name__)
@main_bp.route("/")
def index():
first_question = Question.query.first()
return render_template("index.html", question=first_question)

@ -0,0 +1,27 @@
{% extends "base.html" %}
{% block content %}
<h2>Beheer Vragenlijst</h2>
<a href="{{ url_for('admin.add_question') }}">Nieuwe Vraag Toevoegen</a>
<ul>
{% for question in questions %}
<li>
<strong>{{ question.text }}</strong>
<a href="{{ url_for('admin.edit_question', question_id=question.id) }}">Bewerken</a>
<a href="{{ url_for('admin.add_choice', question_id=question.id) }}">Keuze toevoegen</a>
<ul>
{% for choice in question.choices %}
<li>
<a href="{{ url_for('admin.edit_choice', choice_id=choice.id) }}">
{{ choice.text }}
</a>
{% if choice.next_question %}
- Volgende vraag: <a href="{{ url_for('admin.edit_question', question_id=choice.next_question.id) }}">{{ choice.next_question.text }}</a>
{% endif %}
</li>
{% endfor %}
</ul>
</li>
{% endfor %}
</ul>
{% endblock %}

@ -0,0 +1,26 @@
<!DOCTYPE html>
<html lang="nl">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Vragenlijst</title>
<link rel="stylesheet" href="{{ url_for('static', filename='css/styles.css') }}">
</head>
<body>
<header>
<h1>Vragenlijst App</h1>
<nav>
<a href="{{ url_for('main.index') }}">Start</a>
<a href="{{ url_for('admin.admin_dashboard') }}">Beheer</a>
</nav>
</header>
<main>
{% block content %}{% endblock %}
</main>
<footer>
<p>&copy; 2025 Mijn App</p>
</footer>
</body>
</html>

@ -0,0 +1,13 @@
{% extends "base.html" %}
{% block content %}
<h2>Keuze Bewerken</h2>
<form method="POST">
{{ form.hidden_tag() }}
<div>
<label for="text">Keuze:</label>
{{ form.text() }}
</div>
<button type="submit">Opslaan</button>
</form>
{% endblock %}

@ -0,0 +1,13 @@
{% extends "base.html" %}
{% block content %}
<h2>Vraag Bewerken</h2>
<form method="POST">
{{ form.hidden_tag() }}
<div>
<label for="text">Vraag:</label>
{{ form.text() }}
</div>
<button type="submit">Opslaan</button>
</form>
{% endblock %}

@ -0,0 +1,13 @@
{% extends "base.html" %}
{% block content %}
<h2>{{ question.text }}</h2>
<form method="POST">
{% for choice in question.choices %}
<label>
<input type="radio" name="choice" value="{{ choice.id }}"> {{ choice.text }}
</label><br>
{% endfor %}
<button type="submit">Volgende</button>
</form>
{% endblock %}

@ -0,0 +1,10 @@
FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY app/ app/
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

@ -0,0 +1,8 @@
import logging
def setup_logging():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
)

@ -0,0 +1,7 @@
from fastapi import FastAPI
from .routes import router
from .logger import setup_logging
app = FastAPI()
setup_logging()
app.include_router(router)

@ -0,0 +1,85 @@
from fastapi import APIRouter, Request, Form, WebSocket, WebSocketDisconnect
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from fastapi.security import APIKeyHeader
from dotenv import load_dotenv
import os
import logging
from .storage import store_message, get_all_messages
load_dotenv()
API_KEY = os.getenv("API_KEY")
api_key_header = APIKeyHeader(name="x-api-key", auto_error=False)
router = APIRouter()
templates = Jinja2Templates(directory="app/templates")
# Live WebSocket connecties
active_connections = []
async def validate_api_key(api_key: str = None):
if api_key != API_KEY:
raise Exception("Unauthorized")
async def broadcast(msg: str):
for connection in active_connections:
try:
await connection.send_text(msg)
except:
continue
@router.api_route("/webhook", methods=["GET", "POST"])
async def webhook(request: Request, api_key: str = api_key_header):
await validate_api_key(api_key)
body = await request.body()
log_entry = {
"method": request.method,
"headers": dict(request.headers),
"body": body.decode()
}
logging.info(f"Webhook received: {log_entry}")
store_message(log_entry)
await broadcast(str(log_entry)) # stuur live update via websocket
return {"status": "ok"}
@router.get("/trace", response_class=HTMLResponse)
async def trace_page(request: Request):
return templates.TemplateResponse("trace.html", {
"request": request,
"messages": get_all_messages()
})
@router.websocket("/ws/trace")
async def websocket_endpoint(websocket: WebSocket):
await websocket.accept()
active_connections.append(websocket)
try:
while True:
await websocket.receive_text() # optioneel: keep-alive ping
except WebSocketDisconnect:
active_connections.remove(websocket)
@router.get("/send", response_class=HTMLResponse)
async def send_form(request: Request):
return templates.TemplateResponse("send.html", {"request": request})
@router.post("/send")
async def send_post(request: Request, url: str = Form(...), body: str = Form(...), key: str = Form(...)):
import httpx
headers = {"x-api-key": key}
try:
response = httpx.post(url, content=body, headers=headers)
return {
"status_code": response.status_code,
"response": response.text
}
except Exception as e:
return {"error": str(e)}

@ -0,0 +1,9 @@
messages = []
def store_message(msg: dict):
messages.append(msg)
def get_all_messages():
return list(messages)

@ -0,0 +1,10 @@
<!DOCTYPE html>
<html><body>
<h2>Webhookbericht versturen</h2>
<form method="post">
<label>Webhook URL: <input name="url" type="text" required></label><br>
<label>Body: <textarea name="body" rows="5" cols="50"></textarea></label><br>
<label>API Key: <input name="key" type="text"></label><br>
<button type="submit">Versturen</button>
</form>
</body></html>

@ -0,0 +1,28 @@
<!DOCTYPE html>
<html>
<head>
<title>Live Webhook Trace</title>
<style>
pre { background: #eee; padding: 8px; border-radius: 5px; }
</style>
</head>
<body>
<h2>Ontvangen Webhookberichten (live)</h2>
<ul id="messages">
{% for msg in messages %}
<li><pre>{{ msg }}</pre></li>
{% endfor %}
</ul>
<script>
const ws = new WebSocket("ws://" + location.host + "/ws/trace");
ws.onmessage = function(event) {
const msg = document.createElement("li");
const pre = document.createElement("pre");
pre.innerText = event.data;
msg.appendChild(pre);
document.getElementById("messages").prepend(msg);
};
</script>
</body>
</html>

@ -0,0 +1,19 @@
# docker compose -f docker-compose.yml up -d --build
version: "3.3"
services:
webhooktest:
restart: always
container_name: webhooktest
environment:
- PUID=1000
- PGID=1000
- TZ=Europe/Amsterdam
ports:
- target: 8000
published: 5905
protocol: tcp
build:
dockerfile: ./Dockerfile
env_file:
- .env

@ -0,0 +1,19 @@
# Maak hoofddirectory aan
mkdir -p app/templates
# Ga naar projectmap
# Maak lege bestanden aan in de root
touch .env Dockerfile requirements.txt README.md
# Ga naar de app-map
cd app
# Maak Python-bestanden aan
touch __init__.py main.py logger.py routes.py storage.py
# Ga naar templates-map
cd templates
# Maak de HTML-templates aan
touch trace.html send.html

@ -0,0 +1,5 @@
fastapi
uvicorn
python-dotenv
jinja2
python-multipart

@ -0,0 +1,97 @@
import os
import mysql.connector
from dotenv import load_dotenv
from datetime import datetime
load_dotenv(dotenv_path="../.env")
db_config = {
"host": os.getenv("MYSQL_HOST"),
"port": int(os.getenv("MYSQL_PORT")),
"user": os.getenv("MYSQL_USER"),
"password": os.getenv("MYSQL_PASSWORD"),
"database": os.getenv("MYSQL_DATABASE"),
}
def parse_datetime_safe(dt_string):
try:
return datetime.fromisoformat(dt_string.replace("Z", "").replace("+00:00", ""))
except Exception:
return None
def parse_float_safe(value):
try:
return float(value)
except Exception:
return None
def init_db():
"""Creëer ruwe datatabel met unieke forecast_time."""
conn = mysql.connector.connect(**db_config)
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS solar_raw_data (
id INT AUTO_INCREMENT PRIMARY KEY,
forecast_time DATETIME NOT NULL UNIQUE,
valid_to DATETIME,
capacity FLOAT,
volume FLOAT,
percentage FLOAT,
emission FLOAT,
emission_factor FLOAT,
last_update DATETIME
)
""")
conn.commit()
cursor.close()
conn.close()
def insert_forecast_records(records):
"""Voegt ruwe forecast records toe, met veilige parsing en validatie."""
conn = mysql.connector.connect(**db_config)
cursor = conn.cursor()
# Haal reeds bekende timestamps op
cursor.execute("SELECT forecast_time FROM solar_raw_data")
existing_timestamps = set(row[0] for row in cursor.fetchall())
new_rows = 0
for record in records:
forecast_time = parse_datetime_safe(record.get("validfrom", ""))
if not forecast_time or forecast_time in existing_timestamps:
continue
try:
cursor.execute("""
INSERT INTO solar_raw_data (
forecast_time,
valid_to,
capacity,
volume,
percentage,
emission,
emission_factor,
last_update
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
""", (
forecast_time,
parse_datetime_safe(record.get("validto")),
parse_float_safe(record.get("capacity")),
parse_float_safe(record.get("volume")),
parse_float_safe(record.get("percentage")),
parse_float_safe(record.get("emission")),
parse_float_safe(record.get("emissionfactor")),
parse_datetime_safe(record.get("lastupdate"))
))
new_rows += 1
except Exception as e:
print(f"❌ Fout bij record: {e}\nRecord: {record}")
conn.commit()
cursor.close()
conn.close()
print(f"{new_rows} nieuwe records opgeslagen.")

@ -0,0 +1,52 @@
import os
import requests
from dotenv import load_dotenv
from datetime import datetime, timedelta
from db import init_db, insert_forecast_records
load_dotenv(dotenv_path="../.env")
NED_API_KEY = os.getenv("NED_API_KEY")
def fetch_forecast():
url = "https://api.ned.nl/v1/utilizations"
headers = {
"X-AUTH-TOKEN": NED_API_KEY,
"Accept": "application/ld+json"
}
today = datetime.utcnow().date()
tomorrow = today + timedelta(days=1)
params = {
"point": 9,
"type": 2,
"granularity": 3,
"granularitytimezone": 1,
"classification": 2,
"activity": 1,
"validfrom[after]": today.strftime("%Y-%m-%d"),
"validfrom[strictly_before]": tomorrow.strftime("%Y-%m-%d")
}
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
return response.json()
def main():
print("📡 Ophalen zonneproductievoorspelling van NED.nl")
data = fetch_forecast()
records = data.get("hydra:member", [])
print(records)
print(f"Gevonden records: {len(records)}")
init_db()
insert_forecast_records(records)
print("✅ Data opgeslagen in database")
if __name__ == "__main__":
main()

@ -0,0 +1,3 @@
requests
python-dotenv
mysql-connector-python
Loading…
Cancel
Save