create m4b instead of m4a

feat/dashboard-upgrade
peter.fong 2 weeks ago
parent 5159c32f58
commit 7439d26744

@ -8,6 +8,9 @@ load_dotenv()
print(">>> [WEB] Importing celery_app …")
from celery_app import celery_app
from db.db import init_db
init_db() # ensure DB schema exists before Flask starts
from flask import Flask, render_template, request, jsonify
from scraper.logger import log_debug

@ -5,6 +5,9 @@ from dotenv import load_dotenv
print(">>> [celery_app] Loading .env BEFORE initializing Celery...")
load_dotenv()
from db.db import init_db
init_db() # ensures DB exists for all workers
BROKER = os.getenv("REDIS_BROKER")
BACKEND = os.getenv("REDIS_BACKEND")

@ -0,0 +1,119 @@
# ============================================================
# File: db/db.py
# Purpose:
# Raw SQLite engine for BookScraper.
# Provides ONLY low-level DB primitives.
# - Connection management (WAL mode)
# - init_db() schema creation
# - upsert_book() atomic write
# - raw fetch helpers (private)
#
# All business logic belongs in repository.py.
# ============================================================
import os
import sqlite3
from threading import Lock
DB_PATH = os.environ.get("BOOKSCRAPER_DB", "/app/data/books.db")
# Ensure directory exists
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
# Per-process connection cache
_connection_cache = {}
_connection_lock = Lock()
# ------------------------------------------------------------
# Connection handling
# ------------------------------------------------------------
def get_db():
pid = os.getpid()
if pid not in _connection_cache:
with _connection_lock:
conn = sqlite3.connect(DB_PATH, check_same_thread=False)
conn.row_factory = sqlite3.Row
enable_wal_mode(conn)
_connection_cache[pid] = conn
return _connection_cache[pid]
def enable_wal_mode(conn):
conn.execute("PRAGMA journal_mode=WAL;")
conn.execute("PRAGMA synchronous=NORMAL;")
conn.commit()
# ------------------------------------------------------------
# Schema creation
# ------------------------------------------------------------
def init_db():
conn = get_db()
conn.execute(
"""
CREATE TABLE IF NOT EXISTS books (
book_id TEXT PRIMARY KEY,
title TEXT,
author TEXT,
cover_url TEXT,
cover_path TEXT,
chapters_total INTEGER,
status TEXT,
downloaded INTEGER DEFAULT 0,
parsed INTEGER DEFAULT 0,
audio_done INTEGER DEFAULT 0,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
last_update DATETIME
);
"""
)
conn.commit()
# ------------------------------------------------------------
# WRITE OPERATIONS
# ------------------------------------------------------------
def upsert_book(book_id, **fields):
"""
Raw upsert primitive. Repository layer should call this.
"""
conn = get_db()
keys = ["book_id"] + list(fields.keys())
values = [book_id] + list(fields.values())
placeholders = ",".join(["?"] * len(values))
updates = ", ".join([f"{k} = excluded.{k}" for k in fields.keys()])
sql = f"""
INSERT INTO books ({','.join(keys)})
VALUES ({placeholders})
ON CONFLICT(book_id)
DO UPDATE SET {updates},
last_update = CURRENT_TIMESTAMP;
"""
conn.execute(sql, values)
conn.commit()
# ------------------------------------------------------------
# RAW READ OPERATIONS (PRIVATE)
# ------------------------------------------------------------
def _raw_get_book(book_id):
conn = get_db()
row = conn.execute("SELECT * FROM books WHERE book_id = ?;", (book_id,)).fetchone()
return dict(row) if row else None
def _raw_get_all_books():
conn = get_db()
cur = conn.execute("SELECT * FROM books ORDER BY created_at DESC;")
return [dict(row) for row in cur.fetchall()]

@ -0,0 +1,97 @@
# ============================================================
# File: db/repository.py
# Purpose:
# High-level BookScraper database interface.
# This is the ONLY module Celery tasks and Flask should use.
#
# Uses low-level primitives from db.db, but exposes
# domain-level operations:
# - fetch_book / fetch_all_books
# - create_or_update_book
# - set_status
# - incrementing counters
# ============================================================
from db.db import (
upsert_book,
_raw_get_book,
_raw_get_all_books,
)
# ------------------------------------------------------------
# FETCH OPERATIONS
# ------------------------------------------------------------
def fetch_book(book_id):
"""Return a single book dict or None."""
return _raw_get_book(book_id)
def fetch_all_books():
"""Return all books ordered newest → oldest."""
return _raw_get_all_books()
# ------------------------------------------------------------
# BOOK CREATION / METADATA
# ------------------------------------------------------------
def create_or_update_book(
book_id,
title=None,
author=None,
chapters_total=None,
cover_url=None,
cover_path=None,
status=None,
):
fields = {}
if title is not None:
fields["title"] = title
if author is not None:
fields["author"] = author
if chapters_total is not None:
fields["chapters_total"] = chapters_total
if cover_url is not None:
fields["cover_url"] = cover_url
if cover_path is not None:
fields["cover_path"] = cover_path
if status is not None:
fields["status"] = status
if fields:
upsert_book(book_id, **fields)
# ------------------------------------------------------------
# STATUS MANAGEMENT
# ------------------------------------------------------------
def set_status(book_id, status):
upsert_book(book_id, status=status)
# ------------------------------------------------------------
# INCREMENTING COUNTERS (atomic)
# ------------------------------------------------------------
def inc_downloaded(book_id, amount=1):
book = _raw_get_book(book_id)
if not book:
return
cur = book.get("downloaded", 0) or 0
upsert_book(book_id, downloaded=cur + amount)
def inc_parsed(book_id, amount=1):
book = _raw_get_book(book_id)
if not book:
return
cur = book.get("parsed", 0) or 0
upsert_book(book_id, parsed=cur + amount)
def inc_audio_done(book_id, amount=1):
book = _raw_get_book(book_id)
if not book:
return
cur = book.get("audio_done", 0) or 0
upsert_book(book_id, audio_done=cur + amount)

@ -16,6 +16,7 @@ import os
import requests
import shutil
from scraper.abort import abort_requested # DEBUG allowed
from db.repository import create_or_update_book
# NEW: Redis State Model (C&U)
from scraper.progress import (
@ -133,6 +134,7 @@ class DownloadController:
return
try:
for entry in os.listdir(self.book_base):
if entry.lower().startswith("volume_"):
vol_dir = os.path.join(self.book_base, entry)
@ -144,6 +146,29 @@ class DownloadController:
except Exception as e:
log(f"[CTRL] Cover replication failed: {e}")
def store_cover_in_static(self):
"""
Copy the main cover.jpg from book_base into static/covers/<book_id>.jpg.
This allows the Flask web UI to serve the cover directly.
"""
src = os.path.join(self.book_base, "cover.jpg")
if not os.path.exists(src):
log("[CTRL] No cover.jpg found, cannot store in static/covers")
return
# static/covers/<book_id>.jpg
static_dir = os.path.join("static", "covers")
os.makedirs(static_dir, exist_ok=True)
dst = os.path.join(static_dir, f"{self.book_id}.jpg")
try:
shutil.copyfile(src, dst)
log(f"[CTRL] Cover stored for UI: {dst}")
except Exception as e:
log(f"[CTRL] Failed to store cover in static: {e}")
# ---------------------------------------------------------
# Volume isolation
# ---------------------------------------------------------
@ -225,7 +250,7 @@ class DownloadController:
# -------------------------------------------------------
self.replicate_cover_to_volumes()
self.store_cover_in_static()
# -------------------------------------------------------
try:
generate_all_scripts(

@ -35,7 +35,7 @@ def detect_volumes(book_base: str):
except Exception:
continue
vols.sort()
return [v[0] for v in vols]
return vols
# ------------------------------------------------------------
@ -43,12 +43,12 @@ def detect_volumes(book_base: str):
# ------------------------------------------------------------
def build_merge_block(title: str, author: str, volumes):
lines = []
for vol in volumes:
for num, dirname in volumes:
line = (
f'm4b-tool merge --jobs=4 --writer="{author}" '
f'--albumartist="{author}" --album="{title}" '
f'--name="{title}" --output-file="{title}-{vol}.m4b" '
f'"{vol}" -vvv'
f'--name="{title}" --output-file="{title}-{num}.m4b" '
f'"{dirname}" -vvv'
)
lines.append(line)

@ -7,8 +7,8 @@ from logbus.publisher import log
import os
import subprocess
import time
from scraper.progress import inc_audio_done, inc_audio_skipped
from db.repository import inc_audio_done
from scraper.abort import abort_requested
from redis import Redis
from urllib.parse import urlparse
@ -134,7 +134,7 @@ def generate_audio(
os.makedirs(base_dir, exist_ok=True)
safe_num = f"{chapter_number:04d}"
audio_file = os.path.join(base_dir, f"{safe_num}.m4a")
audio_file = os.path.join(base_dir, f"{safe_num}.m4b")
if os.path.exists(audio_file):
log(f"[AUDIO] Skip CH{chapter_number} → already exists")

@ -18,6 +18,7 @@ from scraper.progress import (
inc_chapter_done,
inc_chapter_download_skipped,
)
from db.repository import inc_downloaded, set_status
from logbus.publisher import log
from scraper.ui_log import push_ui

@ -0,0 +1,38 @@
#!/bin/sh
main_dir="$( cd "$( dirname "$0" )" && pwd )"
shopt -s nocasematch
for subfolder in "$main_dir"/*; do
if [ -d "$subfolder" ]; then
audiofolder="$subfolder/Audio"
mkdir -p "$audiofolder"
for entry in "$subfolder"/*.txt; do
fn=$(basename "$entry")
[[ "${entry##*.}" =~ txt ]]
echo "$fn"
inputfile="$subfolder/$fn"
outputfile="$audiofolder/${fn%.*}.m4b"
now=$(date +"%T")
echo "Current time : $now"
echo "$inputfile ->"
echo "$outputfile"
if [ -f "$outputfile" ]; then
echo "$outputfile exists: skipping"
else
say --voice=Sinji \
--output-file="$outputfile" \
--input-file="$inputfile" \
--file-format=m4bf \
--quality=127 \
-r 200 \
--data-format=aac
fi
done
fi
done

@ -16,7 +16,7 @@ console.log(">>> log_view.js LOADING…");
--------------------------------------------------------- */
let LOG_FILTER = "ALL";
let LAST_LOG_INDEX = -1; // For delta polling
const MAX_LOG_LINES = 2000; // Rolling cap to prevent freezing
const MAX_LOG_LINES = 1000; // Rolling cap to prevent freezing
function applyLogFilter() {
console.log(">>> log_view.js applyLogFilter(), filter =", LOG_FILTER);
@ -49,11 +49,11 @@ document.addEventListener("DOMContentLoaded", () => {
console.log(">>> log_view.js: log viewer detected.");
// Filter dropdown
filterSel.addEventListener("change", () => {
LOG_FILTER = filterSel.value;
console.log(">>> log_view.js filter changed to:", LOG_FILTER);
applyLogFilter();
});
// filterSel.addEventListener("change", () => {
// LOG_FILTER = filterSel.value;
// console.log(">>> log_view.js filter changed to:", LOG_FILTER);
// applyLogFilter();
// });
// Clear log window
if (clearBtn) {

@ -29,8 +29,6 @@
<script>
const BOOK_ID = "{{ book_id }}";
</script>
<script src="/static/js/helpers.js"></script>
<!-- Shared log viewer -->
<script src="/static/js/log_view.js"></script>
@ -38,7 +36,4 @@
<script src="/static/js/dashboard.js"></script>
<!-- Existing global app logic -->
<script src="/static/js/progress.js"></script>
<script src="/static/js/app.js"></script>
{% endblock %}

Loading…
Cancel
Save