You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/utils/state_sync.py

142 lines
4.0 KiB

# ============================================================
# File: scraper/utils/state_sync.py
# Purpose:
# State inspection + optional sync logic for book progress.
# This version provides:
# • inspect_books_state() → NO writes, just a dry-run
# • sync_books_from_redis() → NOT USED YET (kept commented)
# ============================================================
import os
import redis
from db.db import get_db
def inspect_books_state():
"""
Reads all books from SQLite and fetches Redis progress,
but performs NO writes. Only shows:
- sqlite row
- redis state
- merged result (dry-run)
Returns a list of inspection dicts.
"""
r = redis.Redis.from_url(os.getenv("REDIS_BROKER"))
db = get_db()
cur = db.cursor()
cur.execute("SELECT * FROM books")
rows = cur.fetchall()
results = []
for row in rows:
book_id = row["book_id"]
sqlite_row = dict(row)
# Read redis state
redis_key = f"book:{book_id}:state"
progress = r.hgetall(redis_key)
if progress:
decoded = {k.decode(): v.decode() for k, v in progress.items()}
else:
decoded = {}
# Determine dry-run merged result
merged = sqlite_row.copy()
if decoded:
merged["downloaded"] = int(
decoded.get("download_done", merged.get("downloaded", 0))
)
merged["parsed"] = int(decoded.get("parsed_done", merged.get("parsed", 0)))
merged["audio_done"] = int(
decoded.get("audio_done", merged.get("audio_done", 0))
)
merged["chapters_total"] = int(
decoded.get("chapters_total", merged.get("chapters_total", 0))
)
merged["status"] = decoded.get("status", merged.get("status", "unknown"))
results.append(
{
"book_id": book_id,
"sqlite": sqlite_row,
"redis": decoded,
"would_merge_to": merged,
}
)
return results
def sync_books_from_redis():
"""
Reads all books from SQLite, fetches Redis progress,
and updates SQLite rows accordingly.
Returns a list of {
"book_id": ...,
"before": ...,
"redis": ...,
"after": ...
}
"""
r = redis.Redis.from_url(os.getenv("REDIS_BROKER"))
db = get_db()
cur = db.cursor()
# Haal alle boeken op
cur.execute("SELECT * FROM books")
rows = cur.fetchall()
results = []
for row in rows:
book_id = row["book_id"]
before = dict(row)
redis_key = f"book:{book_id}:state"
progress = r.hgetall(redis_key)
if not progress:
results.append(
{"book_id": book_id, "before": before, "redis": {}, "after": before}
)
continue
# Decode Redis bytes → string dictionary
decoded = {k.decode(): v.decode() for k, v in progress.items()}
# Extract counters
downloaded = int(decoded.get("download_done", 0))
parsed = int(decoded.get("parsed_done", 0))
audio_done = int(decoded.get("audio_done", 0))
chapters_total = int(decoded.get("chapters_total", 0))
# Redis status wins
status = decoded.get("status", before["status"])
# Write back to SQLite
cur.execute(
"""
UPDATE books
SET downloaded = ?, parsed = ?, audio_done = ?, chapters_total = ?, status = ?, last_update = datetime('now')
WHERE book_id = ?
""",
(downloaded, parsed, audio_done, chapters_total, status, book_id),
)
db.commit()
# Fetch updated row
cur.execute("SELECT * FROM books WHERE book_id = ?", (book_id,))
after = dict(cur.fetchone())
results.append(
{"book_id": book_id, "before": before, "redis": decoded, "after": after}
)
return results