diff --git a/bookscraper/README.md b/bookscraper/README.md index 01b0304..37da684 100644 --- a/bookscraper/README.md +++ b/bookscraper/README.md @@ -130,3 +130,7 @@ docker builder prune -af docker volume prune -f docker compose build --no-cache docker compose up + +docker compose down +docker compose build +docker compose up diff --git a/bookscraper/app.py b/bookscraper/app.py index 2c53ea1..45f2e29 100644 --- a/bookscraper/app.py +++ b/bookscraper/app.py @@ -111,14 +111,6 @@ def progress(book_id): return jsonify(get_progress(book_id)) -# ===================================================== -# LOGS — GLOBAL UI LOGS -# ===================================================== -@app.route("/logs", methods=["GET"]) -def logs(): - return jsonify({"logs": get_ui_logs()}) - - # ===================================================== # CELERY RESULT → return book_id # ===================================================== @@ -215,11 +207,12 @@ def api_book_logs(book_id): # ===================================================== @app.route("/dashboard") def dashboard(): + logs_list = get_ui_logs() or [] # ★ FIX: dashboard moet altijd books + logs krijgen return render_template( "dashboard/dashboard.html", books=list_active_books(), - logs=get_ui_logs(), + logs=logs_list, # dashboard krijgt LIST, geen dict ) @@ -255,6 +248,34 @@ def debug_redis_keys(): return jsonify(results) +# ============================================================ +# Rolling log endpoint (no new file) +# ============================================================ + +from flask import jsonify, request + +# ===================================================== +# ROLLING LOG ENDPOINT — DELTA POLLING VIA ui_log +# ===================================================== +from scraper.ui_log import get_ui_logs_delta + + +@app.route("/logs", methods=["GET"]) +def logs(): + """ + Delta log delivery for WebGUI. + Browser sends ?last_index=N, we return only new lines. + """ + try: + last_index = int(request.args.get("last_index", -1)) + except: + last_index = -1 + + new_lines, total = get_ui_logs_delta(last_index) + + return jsonify({"lines": new_lines, "total": total}) + + # ===================================================== # RUN FLASK # ===================================================== diff --git a/bookscraper/logbus/publisher.py b/bookscraper/logbus/publisher.py index 9a597db..d87695a 100644 --- a/bookscraper/logbus/publisher.py +++ b/bookscraper/logbus/publisher.py @@ -27,3 +27,32 @@ def log(message: str): push_ui(message) except Exception: pass + + +# ============================================================ +# Delta-based log retrieval using Redis indexes +# ============================================================ + + +def get_ui_logs_delta(last_index: int): + """ + Returns (new_lines, total_count) + Only returns log lines AFTER last_index. + + Example: + last_index = 10 → returns logs with Redis indexes 11..end + """ + # Determine total lines in buffer + total = r.llen(UI_LOG_KEY) + + if total == 0: + return [], 0 + + # First load OR index invalid → send entire buffer + if last_index < 0 or last_index >= total: + logs = r.lrange(UI_LOG_KEY, 0, -1) + return logs, total + + # Only new lines: + new_lines = r.lrange(UI_LOG_KEY, last_index + 1, -1) + return new_lines, total diff --git a/bookscraper/scraper/tasks/controller_tasks.py b/bookscraper/scraper/tasks/controller_tasks.py index 0f06405..0c1419d 100644 --- a/bookscraper/scraper/tasks/controller_tasks.py +++ b/bookscraper/scraper/tasks/controller_tasks.py @@ -13,6 +13,9 @@ from scraper.download_controller import DownloadController from scraper.progress import ( set_total, ) +from urllib.parse import urlparse +import redis +import os from scraper.abort import abort_requested print(">>> [IMPORT] controller_tasks.py loaded") @@ -31,8 +34,30 @@ def launch_downloads(self, book_id: str, scrape_result: dict): title = scrape_result.get("title", "UnknownBook") chapters = scrape_result.get("chapters", []) or [] total = len(chapters) + # ------------------------------------------------------------ + # INIT BOOK STATE MODEL (required for Active Books dashboard) + # ------------------------------------------------------------ + + broker_url = os.getenv("REDIS_BROKER", "redis://redis:6379/0") + parsed = urlparse(broker_url) + + state = redis.Redis( + host=parsed.hostname, + port=parsed.port, + db=int(parsed.path.strip("/")), + decode_responses=True, + ) + + # Book metadata + state.set(f"book:{book_id}:title", title) + state.set(f"book:{book_id}:status", "starting") + + # Download counters + state.set(f"book:{book_id}:download:total", total) + state.set(f"book:{book_id}:download:done", 0) - log(f"[CTRL] Book '{title}' → {total} chapters (book_id={book_id})") + # Audio counters (start at zero) + state.set(f"book:{book_id}:audio:done", 0) # ------------------------------------------------------------ # INIT PROGRESS diff --git a/bookscraper/scraper/tasks/parse_tasks.py b/bookscraper/scraper/tasks/parse_tasks.py index 71392da..0b41bc9 100644 --- a/bookscraper/scraper/tasks/parse_tasks.py +++ b/bookscraper/scraper/tasks/parse_tasks.py @@ -26,11 +26,10 @@ def parse_chapter(self, download_result: dict): book_id = download_result.get("book_id", "NOBOOK") chapter_dict = download_result.get("chapter") or {} book_meta = download_result.get("book_meta") or {} - + chapter_title = chapter_dict.get("title") chapter_num = chapter_dict.get("num") chapter_url = chapter_dict.get("url") html = download_result.get("html") - # ------------------------------------------------------------ # SKIPPED DOWNLOAD → SKIP PARSE # ------------------------------------------------------------ @@ -127,9 +126,8 @@ def parse_chapter(self, download_result: dict): else: prev_blank = False cleaned.append(stripped) - text = "\n".join(cleaned) - + text = chapter_title + "\n" + text # ------------------------------------------------------------ # Add header to chapter 1 # ------------------------------------------------------------ diff --git a/bookscraper/scraper/ui_log.py b/bookscraper/scraper/ui_log.py index 312e20e..efe7bc3 100644 --- a/bookscraper/scraper/ui_log.py +++ b/bookscraper/scraper/ui_log.py @@ -44,3 +44,31 @@ def reset_ui_logs(): - Auto-clear when new book scraping starts """ r.delete(UI_LOG_KEY) + + +# ============================================================ +# Delta-based log retrieval using Redis indexes +# ============================================================ + + +def get_ui_logs_delta(last_index: int): + """ + Returns (new_lines, total_count). + Only returns log lines AFTER last_index. + + Example: + last_index = 10 → returns logs with Redis indexes 11..end + """ + total = r.llen(UI_LOG_KEY) + + if total == 0: + return [], 0 + + # First load OR index invalid → send entire buffer + if last_index < 0 or last_index >= total: + logs = r.lrange(UI_LOG_KEY, 0, -1) + return logs, total + + # Only new logs + new_lines = r.lrange(UI_LOG_KEY, last_index + 1, -1) + return new_lines, total diff --git a/bookscraper/static/css/dashboard.css b/bookscraper/static/css/dashboard.css index 1868e87..c3946d1 100644 --- a/bookscraper/static/css/dashboard.css +++ b/bookscraper/static/css/dashboard.css @@ -186,40 +186,41 @@ white-space: pre-wrap; /* wraps text */ word-break: break-word; /* lange links breken */ } - +/* Basestijl voor alle logregels */ .log-line { white-space: pre-wrap; padding: 2px 0; + font-family: "SF Mono", "Consolas", "Courier New", monospace; } -.log-empty { - color: #888; - font-style: italic; -} -.log-line { - color: #00ff66; +/* Subklassen per logtype */ +.log-line.default { + color: #00ff66; /* groen */ } -.log-line:has([DL]), -.log-line:has([DOWNLOAD]) { +.log-line.dl { color: #00ccff; /* cyan */ } -.log-line:has([PARSE]) { - color: #ffaa00; -} /* orange */ -.log-line:has([SAVE]) { - color: #ffdd33; +.log-line.parse { + color: #ffaa00; /* oranje */ +} + +.log-line.save { + color: #ffdd33; /* geel */ } -.log-line:has([AUDIO]) { - color: #ff66ff; -} /* purple */ -.log-line:has([CTRL]) { - color: #66aaff; + +.log-line.audio { + color: #ff66ff; /* paars */ +} + +.log-line.ctrl { + color: #66aaff; /* lichtblauw */ +} + +.log-line.error { + color: #ff3333; /* rood */ } -.log-line:has([ERROR]) { - color: #ff3333; -} /* red for errors */ /* ------------------------------ PLACEHOLDER @@ -239,3 +240,23 @@ font-size: 12px; border-top: 1px solid #ddd; } +.book-abort-area { + margin-top: 10px; + text-align: right; +} + +.abort-btn { + padding: 6px 12px; + border-radius: 4px; + border: 1px solid #cc0000; + background: #ff4444; + color: white; + font-size: 12px; + cursor: pointer; + transition: background 0.2s, border-color 0.2s; +} + +.abort-btn:hover { + background: #ff2222; + border-color: #aa0000; +} diff --git a/bookscraper/static/js/dashboard.js b/bookscraper/static/js/dashboard.js index bad2ac9..8b2ec5a 100644 --- a/bookscraper/static/js/dashboard.js +++ b/bookscraper/static/js/dashboard.js @@ -104,6 +104,41 @@ function selectBook(bookId) { // Immediate refresh refreshBook(ACTIVE_BOOK); } +setInterval(refreshActiveBooks, 2000); +async function refreshActiveBooks() { + const books = await apiGet("/api/books"); + if (!books) return; + + const container = $("#book-list"); + if (!container) return; + + // Herbouw de lijst + container.innerHTML = ""; + books.forEach((b) => { + const div = document.createElement("div"); + div.className = "book-list-item"; + div.dataset.bookId = b.book_id; + + div.innerHTML = ` +