# ============================================================ # File: scraper/services/init_service.py # Purpose: # Orchestrate INIT-flow: # - resolve site # - fetch minimal metadata # - derive book_idx # - register in SQLite # - store main cover # ============================================================ import re from scraper.services.site_resolver import SiteResolver from scraper.services.scrape_engine import ScrapeEngine from scraper.services.cover_service import CoverService from db.repository import register_book from scraper.logger_decorators import logcall class InitService: # ------------------------------------------------------------ # BOOK IDX DERIVATION # ------------------------------------------------------------ @staticmethod @logcall def derive_book_id(url: str) -> str: """ PTWXZ URL format ends with /{id}.html. If no match → fallback to sanitized URL. Returns: book_idx (string) """ m = re.search(r"/(\d+)\.html$", url) if m: return m.group(1) # Fallback — ensures deterministic ID for unknown formats return url.replace("/", "_").replace(":", "_") # ------------------------------------------------------------ # MAIN INIT FLOW # ------------------------------------------------------------ @staticmethod @logcall def execute(url: str) -> dict: """ INIT entry point. Returns complete metadata + registration result. """ # 1) Resolve site handler site = SiteResolver.resolve(url) # 2) Create unified book_idx book_idx = InitService.derive_book_id(url) # Some site objects historically expect .book_id — we support it but DO NOT rely on it. site.book_id = book_idx # 3) Fetch initial metadata (title/author/description/cover) meta = ScrapeEngine.fetch_metadata_only(site, url) title = meta.get("title") or "Unknown" author = meta.get("author") description = meta.get("description") cover_url = meta.get("cover_url") # 4) Download & store main cover for UI cover_path = CoverService.download_main_cover(cover_url, book_idx) # 5) Register in SQLite (book_idx is the SOLE primary ID) register_book( book_idx=book_idx, title=title, author=author, description=description, cover_url=cover_url, cover_path=cover_path, book_url=url, ) # 6) Return metadata for UI / API return { "book_idx": book_idx, "title": title, "author": author, "description": description, "cover_url": cover_url, "cover_path": cover_path, "status": "registered", }