diff --git a/bookscraper/app.py b/bookscraper/app.py index 8e62f33..35de802 100644 --- a/bookscraper/app.py +++ b/bookscraper/app.py @@ -8,6 +8,9 @@ load_dotenv() print(">>> [WEB] Importing celery_app …") from celery_app import celery_app +from db.db import init_db + +init_db() # ensure DB schema exists before Flask starts from flask import Flask, render_template, request, jsonify from scraper.logger import log_debug diff --git a/bookscraper/celery_app.py b/bookscraper/celery_app.py index 58a034f..adf08bd 100644 --- a/bookscraper/celery_app.py +++ b/bookscraper/celery_app.py @@ -5,6 +5,9 @@ from dotenv import load_dotenv print(">>> [celery_app] Loading .env BEFORE initializing Celery...") load_dotenv() +from db.db import init_db + +init_db() # ensures DB exists for all workers BROKER = os.getenv("REDIS_BROKER") BACKEND = os.getenv("REDIS_BACKEND") diff --git a/bookscraper/db/__init__.py b/bookscraper/db/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bookscraper/db/db.py b/bookscraper/db/db.py new file mode 100644 index 0000000..c944e3a --- /dev/null +++ b/bookscraper/db/db.py @@ -0,0 +1,119 @@ +# ============================================================ +# File: db/db.py +# Purpose: +# Raw SQLite engine for BookScraper. +# Provides ONLY low-level DB primitives. +# - Connection management (WAL mode) +# - init_db() schema creation +# - upsert_book() atomic write +# - raw fetch helpers (private) +# +# All business logic belongs in repository.py. +# ============================================================ + +import os +import sqlite3 +from threading import Lock + +DB_PATH = os.environ.get("BOOKSCRAPER_DB", "/app/data/books.db") + +# Ensure directory exists +os.makedirs(os.path.dirname(DB_PATH), exist_ok=True) + +# Per-process connection cache +_connection_cache = {} +_connection_lock = Lock() + + +# ------------------------------------------------------------ +# Connection handling +# ------------------------------------------------------------ +def get_db(): + pid = os.getpid() + + if pid not in _connection_cache: + with _connection_lock: + conn = sqlite3.connect(DB_PATH, check_same_thread=False) + conn.row_factory = sqlite3.Row + enable_wal_mode(conn) + _connection_cache[pid] = conn + + return _connection_cache[pid] + + +def enable_wal_mode(conn): + conn.execute("PRAGMA journal_mode=WAL;") + conn.execute("PRAGMA synchronous=NORMAL;") + conn.commit() + + +# ------------------------------------------------------------ +# Schema creation +# ------------------------------------------------------------ +def init_db(): + conn = get_db() + conn.execute( + """ + CREATE TABLE IF NOT EXISTS books ( + book_id TEXT PRIMARY KEY, + title TEXT, + author TEXT, + + cover_url TEXT, + cover_path TEXT, + + chapters_total INTEGER, + + status TEXT, + downloaded INTEGER DEFAULT 0, + parsed INTEGER DEFAULT 0, + audio_done INTEGER DEFAULT 0, + + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + last_update DATETIME + ); + """ + ) + conn.commit() + + +# ------------------------------------------------------------ +# WRITE OPERATIONS +# ------------------------------------------------------------ +def upsert_book(book_id, **fields): + """ + Raw upsert primitive. Repository layer should call this. + """ + conn = get_db() + + keys = ["book_id"] + list(fields.keys()) + values = [book_id] + list(fields.values()) + placeholders = ",".join(["?"] * len(values)) + + updates = ", ".join([f"{k} = excluded.{k}" for k in fields.keys()]) + + sql = f""" + INSERT INTO books ({','.join(keys)}) + VALUES ({placeholders}) + ON CONFLICT(book_id) + DO UPDATE SET {updates}, + last_update = CURRENT_TIMESTAMP; + """ + + conn.execute(sql, values) + conn.commit() + + +# ------------------------------------------------------------ +# RAW READ OPERATIONS (PRIVATE) +# ------------------------------------------------------------ +def _raw_get_book(book_id): + conn = get_db() + row = conn.execute("SELECT * FROM books WHERE book_id = ?;", (book_id,)).fetchone() + return dict(row) if row else None + + +def _raw_get_all_books(): + conn = get_db() + cur = conn.execute("SELECT * FROM books ORDER BY created_at DESC;") + return [dict(row) for row in cur.fetchall()] diff --git a/bookscraper/db/repository.py b/bookscraper/db/repository.py new file mode 100644 index 0000000..047511e --- /dev/null +++ b/bookscraper/db/repository.py @@ -0,0 +1,97 @@ +# ============================================================ +# File: db/repository.py +# Purpose: +# High-level BookScraper database interface. +# This is the ONLY module Celery tasks and Flask should use. +# +# Uses low-level primitives from db.db, but exposes +# domain-level operations: +# - fetch_book / fetch_all_books +# - create_or_update_book +# - set_status +# - incrementing counters +# ============================================================ + +from db.db import ( + upsert_book, + _raw_get_book, + _raw_get_all_books, +) + + +# ------------------------------------------------------------ +# FETCH OPERATIONS +# ------------------------------------------------------------ +def fetch_book(book_id): + """Return a single book dict or None.""" + return _raw_get_book(book_id) + + +def fetch_all_books(): + """Return all books ordered newest → oldest.""" + return _raw_get_all_books() + + +# ------------------------------------------------------------ +# BOOK CREATION / METADATA +# ------------------------------------------------------------ +def create_or_update_book( + book_id, + title=None, + author=None, + chapters_total=None, + cover_url=None, + cover_path=None, + status=None, +): + fields = {} + + if title is not None: + fields["title"] = title + if author is not None: + fields["author"] = author + if chapters_total is not None: + fields["chapters_total"] = chapters_total + if cover_url is not None: + fields["cover_url"] = cover_url + if cover_path is not None: + fields["cover_path"] = cover_path + if status is not None: + fields["status"] = status + + if fields: + upsert_book(book_id, **fields) + + +# ------------------------------------------------------------ +# STATUS MANAGEMENT +# ------------------------------------------------------------ +def set_status(book_id, status): + upsert_book(book_id, status=status) + + +# ------------------------------------------------------------ +# INCREMENTING COUNTERS (atomic) +# ------------------------------------------------------------ +def inc_downloaded(book_id, amount=1): + book = _raw_get_book(book_id) + if not book: + return + cur = book.get("downloaded", 0) or 0 + upsert_book(book_id, downloaded=cur + amount) + + +def inc_parsed(book_id, amount=1): + book = _raw_get_book(book_id) + if not book: + return + cur = book.get("parsed", 0) or 0 + upsert_book(book_id, parsed=cur + amount) + + +def inc_audio_done(book_id, amount=1): + book = _raw_get_book(book_id) + if not book: + return + cur = book.get("audio_done", 0) or 0 + upsert_book(book_id, audio_done=cur + amount) diff --git a/bookscraper/scraper/download_controller.py b/bookscraper/scraper/download_controller.py index bb2b57a..f3ad53f 100644 --- a/bookscraper/scraper/download_controller.py +++ b/bookscraper/scraper/download_controller.py @@ -16,6 +16,7 @@ import os import requests import shutil from scraper.abort import abort_requested # DEBUG allowed +from db.repository import create_or_update_book # NEW: Redis State Model (C&U) from scraper.progress import ( @@ -133,6 +134,7 @@ class DownloadController: return try: + for entry in os.listdir(self.book_base): if entry.lower().startswith("volume_"): vol_dir = os.path.join(self.book_base, entry) @@ -144,6 +146,29 @@ class DownloadController: except Exception as e: log(f"[CTRL] Cover replication failed: {e}") + def store_cover_in_static(self): + """ + Copy the main cover.jpg from book_base into static/covers/.jpg. + This allows the Flask web UI to serve the cover directly. + """ + + src = os.path.join(self.book_base, "cover.jpg") + if not os.path.exists(src): + log("[CTRL] No cover.jpg found, cannot store in static/covers") + return + + # static/covers/.jpg + static_dir = os.path.join("static", "covers") + os.makedirs(static_dir, exist_ok=True) + + dst = os.path.join(static_dir, f"{self.book_id}.jpg") + + try: + shutil.copyfile(src, dst) + log(f"[CTRL] Cover stored for UI: {dst}") + except Exception as e: + log(f"[CTRL] Failed to store cover in static: {e}") + # --------------------------------------------------------- # Volume isolation # --------------------------------------------------------- @@ -225,7 +250,7 @@ class DownloadController: # ------------------------------------------------------- self.replicate_cover_to_volumes() - + self.store_cover_in_static() # ------------------------------------------------------- try: generate_all_scripts( diff --git a/bookscraper/scraper/scriptgen.py b/bookscraper/scraper/scriptgen.py index 4b714cf..bd9c148 100644 --- a/bookscraper/scraper/scriptgen.py +++ b/bookscraper/scraper/scriptgen.py @@ -35,7 +35,7 @@ def detect_volumes(book_base: str): except Exception: continue vols.sort() - return [v[0] for v in vols] + return vols # ------------------------------------------------------------ @@ -43,12 +43,12 @@ def detect_volumes(book_base: str): # ------------------------------------------------------------ def build_merge_block(title: str, author: str, volumes): lines = [] - for vol in volumes: + for num, dirname in volumes: line = ( f'm4b-tool merge --jobs=4 --writer="{author}" ' f'--albumartist="{author}" --album="{title}" ' - f'--name="{title}" --output-file="{title}-{vol}.m4b" ' - f'"{vol}" -vvv' + f'--name="{title}" --output-file="{title}-{num}.m4b" ' + f'"{dirname}" -vvv' ) lines.append(line) diff --git a/bookscraper/scraper/tasks/audio_tasks.py b/bookscraper/scraper/tasks/audio_tasks.py index 73a2991..4e23ce2 100644 --- a/bookscraper/scraper/tasks/audio_tasks.py +++ b/bookscraper/scraper/tasks/audio_tasks.py @@ -7,8 +7,8 @@ from logbus.publisher import log import os import subprocess import time - from scraper.progress import inc_audio_done, inc_audio_skipped +from db.repository import inc_audio_done from scraper.abort import abort_requested from redis import Redis from urllib.parse import urlparse @@ -134,7 +134,7 @@ def generate_audio( os.makedirs(base_dir, exist_ok=True) safe_num = f"{chapter_number:04d}" - audio_file = os.path.join(base_dir, f"{safe_num}.m4a") + audio_file = os.path.join(base_dir, f"{safe_num}.m4b") if os.path.exists(audio_file): log(f"[AUDIO] Skip CH{chapter_number} → already exists") diff --git a/bookscraper/scraper/tasks/download_tasks.py b/bookscraper/scraper/tasks/download_tasks.py index 641c295..d3f3785 100644 --- a/bookscraper/scraper/tasks/download_tasks.py +++ b/bookscraper/scraper/tasks/download_tasks.py @@ -18,6 +18,7 @@ from scraper.progress import ( inc_chapter_done, inc_chapter_download_skipped, ) +from db.repository import inc_downloaded, set_status from logbus.publisher import log from scraper.ui_log import push_ui diff --git a/bookscraper/scraper/templates/say.template b/bookscraper/scraper/templates/say.template new file mode 100644 index 0000000..9b35303 --- /dev/null +++ b/bookscraper/scraper/templates/say.template @@ -0,0 +1,38 @@ +#!/bin/sh + +main_dir="$( cd "$( dirname "$0" )" && pwd )" + +shopt -s nocasematch + +for subfolder in "$main_dir"/*; do + if [ -d "$subfolder" ]; then + audiofolder="$subfolder/Audio" + mkdir -p "$audiofolder" + + for entry in "$subfolder"/*.txt; do + fn=$(basename "$entry") + [[ "${entry##*.}" =~ txt ]] + + echo "$fn" + inputfile="$subfolder/$fn" + outputfile="$audiofolder/${fn%.*}.m4b" + + now=$(date +"%T") + echo "Current time : $now" + echo "$inputfile ->" + echo "$outputfile" + + if [ -f "$outputfile" ]; then + echo "$outputfile exists: skipping" + else + say --voice=Sinji \ + --output-file="$outputfile" \ + --input-file="$inputfile" \ + --file-format=m4bf \ + --quality=127 \ + -r 200 \ + --data-format=aac + fi + done + fi +done diff --git a/bookscraper/static/js/log_view.js b/bookscraper/static/js/log_view.js index a65b271..ba4cff7 100644 --- a/bookscraper/static/js/log_view.js +++ b/bookscraper/static/js/log_view.js @@ -16,7 +16,7 @@ console.log(">>> log_view.js LOADING…"); --------------------------------------------------------- */ let LOG_FILTER = "ALL"; let LAST_LOG_INDEX = -1; // For delta polling -const MAX_LOG_LINES = 2000; // Rolling cap to prevent freezing +const MAX_LOG_LINES = 1000; // Rolling cap to prevent freezing function applyLogFilter() { console.log(">>> log_view.js applyLogFilter(), filter =", LOG_FILTER); @@ -49,11 +49,11 @@ document.addEventListener("DOMContentLoaded", () => { console.log(">>> log_view.js: log viewer detected."); // Filter dropdown - filterSel.addEventListener("change", () => { - LOG_FILTER = filterSel.value; - console.log(">>> log_view.js filter changed to:", LOG_FILTER); - applyLogFilter(); - }); + // filterSel.addEventListener("change", () => { + // LOG_FILTER = filterSel.value; + // console.log(">>> log_view.js filter changed to:", LOG_FILTER); + // applyLogFilter(); + // }); // Clear log window if (clearBtn) { diff --git a/bookscraper/templates/dashboard/book_detail.html b/bookscraper/templates/dashboard/book_detail.html index 342be63..54001d5 100644 --- a/bookscraper/templates/dashboard/book_detail.html +++ b/bookscraper/templates/dashboard/book_detail.html @@ -29,8 +29,6 @@ - - @@ -38,7 +36,4 @@ - - - {% endblock %}