You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/db/db.py

120 lines
3.2 KiB

# ============================================================
# File: db/db.py
# Purpose:
# Raw SQLite engine for BookScraper.
# Provides ONLY low-level DB primitives.
# - Connection management (WAL mode)
# - init_db() schema creation
# - upsert_book() atomic write
# - raw fetch helpers (private)
#
# All business logic belongs in repository.py.
# ============================================================
import os
import sqlite3
from threading import Lock
DB_PATH = os.environ.get("BOOKSCRAPER_DB", "/app/data/books.db")
# Ensure directory exists
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
# Per-process connection cache
_connection_cache = {}
_connection_lock = Lock()
# ------------------------------------------------------------
# Connection handling
# ------------------------------------------------------------
def get_db():
pid = os.getpid()
if pid not in _connection_cache:
with _connection_lock:
conn = sqlite3.connect(DB_PATH, check_same_thread=False)
conn.row_factory = sqlite3.Row
enable_wal_mode(conn)
_connection_cache[pid] = conn
return _connection_cache[pid]
def enable_wal_mode(conn):
conn.execute("PRAGMA journal_mode=DELETE;")
conn.execute("PRAGMA synchronous=NORMAL;")
conn.commit()
# ------------------------------------------------------------
# Schema creation
# ------------------------------------------------------------
def init_db():
conn = get_db()
conn.execute(
"""
CREATE TABLE IF NOT EXISTS books (
book_id TEXT PRIMARY KEY,
title TEXT,
author TEXT,
cover_url TEXT,
cover_path TEXT,
chapters_total INTEGER,
status TEXT,
downloaded INTEGER DEFAULT 0,
parsed INTEGER DEFAULT 0,
audio_done INTEGER DEFAULT 0,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
last_update DATETIME
);
"""
)
conn.commit()
# ------------------------------------------------------------
# WRITE OPERATIONS
# ------------------------------------------------------------
def upsert_book(book_id, **fields):
"""
Raw upsert primitive. Repository layer should call this.
"""
conn = get_db()
keys = ["book_id"] + list(fields.keys())
values = [book_id] + list(fields.values())
placeholders = ",".join(["?"] * len(values))
updates = ", ".join([f"{k} = excluded.{k}" for k in fields.keys()])
sql = f"""
INSERT INTO books ({','.join(keys)})
VALUES ({placeholders})
ON CONFLICT(book_id)
DO UPDATE SET {updates},
last_update = CURRENT_TIMESTAMP;
"""
conn.execute(sql, values)
conn.commit()
# ------------------------------------------------------------
# RAW READ OPERATIONS (PRIVATE)
# ------------------------------------------------------------
def _raw_get_book(book_id):
conn = get_db()
row = conn.execute("SELECT * FROM books WHERE book_id = ?;", (book_id,)).fetchone()
return dict(row) if row else None
def _raw_get_all_books():
conn = get_db()
cur = conn.execute("SELECT * FROM books ORDER BY created_at DESC;")
return [dict(row) for row in cur.fetchall()]