You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/services/init_service.py

75 lines
2.0 KiB

# ============================================================
# File: scraper/services/init_service.py
# Purpose:
# Orchestrate INIT-flow:
# - resolve site
# - fetch minimal metadata
# - derive book_id
# - register in SQLite
# - store main cover
# ============================================================
import re
from scraper.services.site_resolver import SiteResolver
from scraper.services.scrape_engine import ScrapeEngine
from scraper.services.cover_service import CoverService
from db.repository import register_book
class InitService:
@staticmethod
def derive_book_id(url: str) -> str:
"""
PTWXZ URL format ends with /{id}.html.
If no match → fallback to sanitized URL.
"""
m = re.search(r"/(\d+)\.html$", url)
if m:
return m.group(1)
return url.replace("/", "_")
@staticmethod
def execute(url: str) -> dict:
"""
Main INIT-flow entry point.
Returns complete metadata + registration info.
"""
# 1) Determine which BookSite applies
site = SiteResolver.resolve(url)
# 2) Metadata only (no chapters)
meta = ScrapeEngine.fetch_metadata_only(site, url)
title = meta.get("title") or "Unknown"
author = meta.get("author")
description = meta.get("description")
cover_url = meta.get("cover_url")
# 3) Determine book_id
book_id = InitService.derive_book_id(url)
# 4) SQLite registration
register_book(
book_id=book_id,
title=title,
author=author,
description=description,
cover_url=cover_url,
)
# 5) Download UI cover
CoverService.download_main_cover(cover_url, book_id)
# 6) Structured output for UI
return {
"book_id": book_id,
"title": title,
"author": author,
"description": description,
"cover_url": cover_url,
"status": "registered",
}