diff --git a/bookscraper/README.md b/bookscraper/README.md index 01b0304..37da684 100644 --- a/bookscraper/README.md +++ b/bookscraper/README.md @@ -130,3 +130,7 @@ docker builder prune -af docker volume prune -f docker compose build --no-cache docker compose up + +docker compose down +docker compose build +docker compose up diff --git a/bookscraper/app.py b/bookscraper/app.py index 2c53ea1..45f2e29 100644 --- a/bookscraper/app.py +++ b/bookscraper/app.py @@ -111,14 +111,6 @@ def progress(book_id): return jsonify(get_progress(book_id)) -# ===================================================== -# LOGS — GLOBAL UI LOGS -# ===================================================== -@app.route("/logs", methods=["GET"]) -def logs(): - return jsonify({"logs": get_ui_logs()}) - - # ===================================================== # CELERY RESULT → return book_id # ===================================================== @@ -215,11 +207,12 @@ def api_book_logs(book_id): # ===================================================== @app.route("/dashboard") def dashboard(): + logs_list = get_ui_logs() or [] # ★ FIX: dashboard moet altijd books + logs krijgen return render_template( "dashboard/dashboard.html", books=list_active_books(), - logs=get_ui_logs(), + logs=logs_list, # dashboard krijgt LIST, geen dict ) @@ -255,6 +248,34 @@ def debug_redis_keys(): return jsonify(results) +# ============================================================ +# Rolling log endpoint (no new file) +# ============================================================ + +from flask import jsonify, request + +# ===================================================== +# ROLLING LOG ENDPOINT — DELTA POLLING VIA ui_log +# ===================================================== +from scraper.ui_log import get_ui_logs_delta + + +@app.route("/logs", methods=["GET"]) +def logs(): + """ + Delta log delivery for WebGUI. + Browser sends ?last_index=N, we return only new lines. + """ + try: + last_index = int(request.args.get("last_index", -1)) + except: + last_index = -1 + + new_lines, total = get_ui_logs_delta(last_index) + + return jsonify({"lines": new_lines, "total": total}) + + # ===================================================== # RUN FLASK # ===================================================== diff --git a/bookscraper/logbus/publisher.py b/bookscraper/logbus/publisher.py index 9a597db..d87695a 100644 --- a/bookscraper/logbus/publisher.py +++ b/bookscraper/logbus/publisher.py @@ -27,3 +27,32 @@ def log(message: str): push_ui(message) except Exception: pass + + +# ============================================================ +# Delta-based log retrieval using Redis indexes +# ============================================================ + + +def get_ui_logs_delta(last_index: int): + """ + Returns (new_lines, total_count) + Only returns log lines AFTER last_index. + + Example: + last_index = 10 → returns logs with Redis indexes 11..end + """ + # Determine total lines in buffer + total = r.llen(UI_LOG_KEY) + + if total == 0: + return [], 0 + + # First load OR index invalid → send entire buffer + if last_index < 0 or last_index >= total: + logs = r.lrange(UI_LOG_KEY, 0, -1) + return logs, total + + # Only new lines: + new_lines = r.lrange(UI_LOG_KEY, last_index + 1, -1) + return new_lines, total diff --git a/bookscraper/scraper/tasks/controller_tasks.py b/bookscraper/scraper/tasks/controller_tasks.py index 0f06405..0c1419d 100644 --- a/bookscraper/scraper/tasks/controller_tasks.py +++ b/bookscraper/scraper/tasks/controller_tasks.py @@ -13,6 +13,9 @@ from scraper.download_controller import DownloadController from scraper.progress import ( set_total, ) +from urllib.parse import urlparse +import redis +import os from scraper.abort import abort_requested print(">>> [IMPORT] controller_tasks.py loaded") @@ -31,8 +34,30 @@ def launch_downloads(self, book_id: str, scrape_result: dict): title = scrape_result.get("title", "UnknownBook") chapters = scrape_result.get("chapters", []) or [] total = len(chapters) + # ------------------------------------------------------------ + # INIT BOOK STATE MODEL (required for Active Books dashboard) + # ------------------------------------------------------------ + + broker_url = os.getenv("REDIS_BROKER", "redis://redis:6379/0") + parsed = urlparse(broker_url) + + state = redis.Redis( + host=parsed.hostname, + port=parsed.port, + db=int(parsed.path.strip("/")), + decode_responses=True, + ) + + # Book metadata + state.set(f"book:{book_id}:title", title) + state.set(f"book:{book_id}:status", "starting") + + # Download counters + state.set(f"book:{book_id}:download:total", total) + state.set(f"book:{book_id}:download:done", 0) - log(f"[CTRL] Book '{title}' → {total} chapters (book_id={book_id})") + # Audio counters (start at zero) + state.set(f"book:{book_id}:audio:done", 0) # ------------------------------------------------------------ # INIT PROGRESS diff --git a/bookscraper/scraper/tasks/parse_tasks.py b/bookscraper/scraper/tasks/parse_tasks.py index 71392da..0b41bc9 100644 --- a/bookscraper/scraper/tasks/parse_tasks.py +++ b/bookscraper/scraper/tasks/parse_tasks.py @@ -26,11 +26,10 @@ def parse_chapter(self, download_result: dict): book_id = download_result.get("book_id", "NOBOOK") chapter_dict = download_result.get("chapter") or {} book_meta = download_result.get("book_meta") or {} - + chapter_title = chapter_dict.get("title") chapter_num = chapter_dict.get("num") chapter_url = chapter_dict.get("url") html = download_result.get("html") - # ------------------------------------------------------------ # SKIPPED DOWNLOAD → SKIP PARSE # ------------------------------------------------------------ @@ -127,9 +126,8 @@ def parse_chapter(self, download_result: dict): else: prev_blank = False cleaned.append(stripped) - text = "\n".join(cleaned) - + text = chapter_title + "\n" + text # ------------------------------------------------------------ # Add header to chapter 1 # ------------------------------------------------------------ diff --git a/bookscraper/scraper/ui_log.py b/bookscraper/scraper/ui_log.py index 312e20e..efe7bc3 100644 --- a/bookscraper/scraper/ui_log.py +++ b/bookscraper/scraper/ui_log.py @@ -44,3 +44,31 @@ def reset_ui_logs(): - Auto-clear when new book scraping starts """ r.delete(UI_LOG_KEY) + + +# ============================================================ +# Delta-based log retrieval using Redis indexes +# ============================================================ + + +def get_ui_logs_delta(last_index: int): + """ + Returns (new_lines, total_count). + Only returns log lines AFTER last_index. + + Example: + last_index = 10 → returns logs with Redis indexes 11..end + """ + total = r.llen(UI_LOG_KEY) + + if total == 0: + return [], 0 + + # First load OR index invalid → send entire buffer + if last_index < 0 or last_index >= total: + logs = r.lrange(UI_LOG_KEY, 0, -1) + return logs, total + + # Only new logs + new_lines = r.lrange(UI_LOG_KEY, last_index + 1, -1) + return new_lines, total diff --git a/bookscraper/static/css/dashboard.css b/bookscraper/static/css/dashboard.css index 1868e87..c3946d1 100644 --- a/bookscraper/static/css/dashboard.css +++ b/bookscraper/static/css/dashboard.css @@ -186,40 +186,41 @@ white-space: pre-wrap; /* wraps text */ word-break: break-word; /* lange links breken */ } - +/* Basestijl voor alle logregels */ .log-line { white-space: pre-wrap; padding: 2px 0; + font-family: "SF Mono", "Consolas", "Courier New", monospace; } -.log-empty { - color: #888; - font-style: italic; -} -.log-line { - color: #00ff66; +/* Subklassen per logtype */ +.log-line.default { + color: #00ff66; /* groen */ } -.log-line:has([DL]), -.log-line:has([DOWNLOAD]) { +.log-line.dl { color: #00ccff; /* cyan */ } -.log-line:has([PARSE]) { - color: #ffaa00; -} /* orange */ -.log-line:has([SAVE]) { - color: #ffdd33; +.log-line.parse { + color: #ffaa00; /* oranje */ +} + +.log-line.save { + color: #ffdd33; /* geel */ } -.log-line:has([AUDIO]) { - color: #ff66ff; -} /* purple */ -.log-line:has([CTRL]) { - color: #66aaff; + +.log-line.audio { + color: #ff66ff; /* paars */ +} + +.log-line.ctrl { + color: #66aaff; /* lichtblauw */ +} + +.log-line.error { + color: #ff3333; /* rood */ } -.log-line:has([ERROR]) { - color: #ff3333; -} /* red for errors */ /* ------------------------------ PLACEHOLDER @@ -239,3 +240,23 @@ font-size: 12px; border-top: 1px solid #ddd; } +.book-abort-area { + margin-top: 10px; + text-align: right; +} + +.abort-btn { + padding: 6px 12px; + border-radius: 4px; + border: 1px solid #cc0000; + background: #ff4444; + color: white; + font-size: 12px; + cursor: pointer; + transition: background 0.2s, border-color 0.2s; +} + +.abort-btn:hover { + background: #ff2222; + border-color: #aa0000; +} diff --git a/bookscraper/static/js/dashboard.js b/bookscraper/static/js/dashboard.js index bad2ac9..8b2ec5a 100644 --- a/bookscraper/static/js/dashboard.js +++ b/bookscraper/static/js/dashboard.js @@ -104,6 +104,41 @@ function selectBook(bookId) { // Immediate refresh refreshBook(ACTIVE_BOOK); } +setInterval(refreshActiveBooks, 2000); +async function refreshActiveBooks() { + const books = await apiGet("/api/books"); + if (!books) return; + + const container = $("#book-list"); + if (!container) return; + + // Herbouw de lijst + container.innerHTML = ""; + books.forEach((b) => { + const div = document.createElement("div"); + div.className = "book-list-item"; + div.dataset.bookId = b.book_id; + + div.innerHTML = ` +
${b.title}
+
${b.status}
+
+ ${b.download_done}/${b.download_total} downloaded, + ${b.audio_done}/${b.audio_total} audio +
+ `; + + // Event listener opnieuw koppelen + div.addEventListener("click", () => selectBook(b.book_id)); + + container.appendChild(div); + }); + + // Als ACTIVE_BOOK nog niet bekend → auto-selecteer eerste boek + if (!ACTIVE_BOOK && books.length > 0) { + selectBook(books[0].book_id); + } +} /* --------------------------------------------------------- Fetch logs + progress from API @@ -148,3 +183,16 @@ function updateLogs(logList) { autoScroll(output); } + +function abortBook(book_id) { + if (!confirm(`Abort tasks for book ${book_id}?`)) return; + + fetch(`/abort/${book_id}`, { method: "POST" }) + .then((r) => r.json()) + .then((data) => { + console.log("Abort:", data); + }) + .catch((err) => { + console.error("Abort failed:", err); + }); +} diff --git a/bookscraper/static/js/log_view.js b/bookscraper/static/js/log_view.js index 85507ef..c51c633 100644 --- a/bookscraper/static/js/log_view.js +++ b/bookscraper/static/js/log_view.js @@ -5,7 +5,8 @@ - filtering - clearing - auto-scroll - - refresh support for dashboard polling + - delta polling (efficient) + - rolling limit (prevent GUI freeze) ======================================================================= */ console.log(">>> log_view.js LOADING…"); @@ -14,6 +15,8 @@ console.log(">>> log_view.js LOADING…"); Log filtering --------------------------------------------------------- */ let LOG_FILTER = "ALL"; +let LAST_LOG_INDEX = -1; // For delta polling +const MAX_LOG_LINES = 2000; // Rolling cap to prevent freezing function applyLogFilter() { console.log(">>> log_view.js applyLogFilter(), filter =", LOG_FILTER); @@ -56,32 +59,74 @@ document.addEventListener("DOMContentLoaded", () => { if (clearBtn) { clearBtn.addEventListener("click", () => { console.log(">>> log_view.js log-clear clicked → clearing output"); - if (output) output.innerHTML = ""; + if (output) { + output.innerHTML = ""; + LAST_LOG_INDEX = -1; // reset delta polling + } }); } }); /* --------------------------------------------------------- - Append a line to the log output + Append + Rolling buffer --------------------------------------------------------- */ function logAppend(lineText) { const output = $("#log-output"); + if (!output) return; - if (!output) { - console.log(">>> log_view.js logAppend() SKIPPED — no #log-output"); - return; + const div = document.createElement("div"); + div.classList.add("log-line"); + + // ----------------------------------------------------- + // Assign subtype classes + // ----------------------------------------------------- + if (lineText.includes("[DL]") || lineText.includes("[DOWNLOAD]")) { + div.classList.add("dl"); + } else if (lineText.includes("[PARSE]")) { + div.classList.add("parse"); + } else if (lineText.includes("[SAVE]")) { + div.classList.add("save"); + } else if (lineText.includes("[AUDIO]")) { + div.classList.add("audio"); + } else if (lineText.includes("[CTRL]")) { + div.classList.add("ctrl"); + } else if (lineText.includes("[ERROR]")) { + div.classList.add("error"); + } else { + div.classList.add("default"); } - console.log(">>> log_view.js logAppend():", lineText); - - const div = document.createElement("div"); - div.className = "log-line"; div.innerText = lineText; - output.appendChild(div); + // Rolling buffer + while (output.children.length > MAX_LOG_LINES) { + output.removeChild(output.firstChild); + } + applyLogFilter(); autoScroll(output); } +/* --------------------------------------------------------- + Delta-based log polling + --------------------------------------------------------- */ +function pollLogs() { + fetch(`/logs?last_index=${LAST_LOG_INDEX}`) + .then((r) => r.json()) + .then((data) => { + const lines = data.lines || []; + if (lines.length > 0) { + lines.forEach((line) => logAppend(line)); + LAST_LOG_INDEX = data.total - 1; + } + }) + .catch((err) => { + console.warn(">>> log_view.js pollLogs() error:", err); + }); +} + +// Poll every 800 ms +setInterval(pollLogs, 800); + console.log(">>> log_view.js LOADED"); diff --git a/bookscraper/templates/components/book_list_item.html b/bookscraper/templates/components/book_list_item.html index 69dcaf2..abfc6d7 100644 --- a/bookscraper/templates/components/book_list_item.html +++ b/bookscraper/templates/components/book_list_item.html @@ -58,4 +58,11 @@ {{ pct_au }}% + {% if book.status in ["running", "active", "processing"] %} +
+ +
+ {% endif %} diff --git a/bookscraper/templates/components/nav.html b/bookscraper/templates/components/nav.html index 782a379..1653bca 100644 --- a/bookscraper/templates/components/nav.html +++ b/bookscraper/templates/components/nav.html @@ -17,7 +17,7 @@
  • - Active Books + Active Books
  • diff --git a/bookscraper/templates/dashboard/dashboard.html b/bookscraper/templates/dashboard/dashboard.html index d13252c..79e2a69 100644 --- a/bookscraper/templates/dashboard/dashboard.html +++ b/bookscraper/templates/dashboard/dashboard.html @@ -29,12 +29,12 @@

    Actieve boeken

    {% if books and books|length > 0 %} -
    +
    {% for book in books %} {% include "components/book_list_item.html" %} {% endfor %}
    {% else %} -
    Geen actieve boeken.
    +
    Geen actieve boeken.
    {% endif %}