# ============================================================ # File: scraper/services/init_service.py # Purpose: # Orchestrate INIT-flow: # - resolve site # - fetch minimal metadata # - derive book_id # - register in SQLite # - store main cover # ============================================================ import re from scraper.services.site_resolver import SiteResolver from scraper.services.scrape_engine import ScrapeEngine from scraper.services.cover_service import CoverService from db.repository import register_book class InitService: @staticmethod def derive_book_id(url: str) -> str: """ PTWXZ URL format ends with /{id}.html. If no match → fallback to sanitized URL. """ m = re.search(r"/(\d+)\.html$", url) if m: return m.group(1) return url.replace("/", "_") @staticmethod def execute(url: str) -> dict: """ Main INIT-flow entry point. Returns complete metadata + registration info. """ # 1) Determine which BookSite applies site = SiteResolver.resolve(url) # 2) Metadata only (no chapters) meta = ScrapeEngine.fetch_metadata_only(site, url) title = meta.get("title") or "Unknown" author = meta.get("author") description = meta.get("description") cover_url = meta.get("cover_url") # 3) Determine book_id book_id = InitService.derive_book_id(url) # 4) SQLite registration register_book( book_id=book_id, title=title, author=author, description=description, cover_url=cover_url, ) # 5) Download UI cover CoverService.download_main_cover(cover_url, book_id) # 6) Structured output for UI return { "book_id": book_id, "title": title, "author": author, "description": description, "cover_url": cover_url, "status": "registered", }