You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/book_scraper.py

56 lines
1.8 KiB

# ============================================================
# File: scraper/book_scraper.py
# Purpose:
# Backwards-compatible wrapper giving the SAME public API
# as the old BookScraper, but internally uses ScrapeEngine.
#
# execute() → full metadata + chapterlist (NO book_idx creation)
#
# ID management is now handled exclusively by InitService.
# ============================================================
from scraper.logger_decorators import logcall
from scraper.services.scrape_engine import ScrapeEngine
class BookScraper:
"""
Backwards-compatible BookScraper façade.
Old responsibilities (metadata, chapters, covers, downloads)
are now split:
ScrapeEngine → metadata + chapterlist
Download tasks → handle download/parse/save
InitService → determines book_idx (single source of truth)
This wrapper intentionally does NOT generate a book_idx or book_id.
It only returns metadata/chapters in legacy-compatible dict format.
"""
@logcall
def __init__(self, site_scraper, url: str):
self.site = site_scraper
self.url = url
@logcall
def execute(self):
"""
Legacy public API:
Return metadata + chapter list EXACTLY as before,
but without generating any book_id.
"""
data = ScrapeEngine.fetch_metadata_and_chapters(self.site, self.url)
# Legacy structure preserved, unchanged:
return {
"title": data.get("title"),
"author": data.get("author"),
"description": data.get("description"),
"cover_url": data.get("cover_url"),
"chapters": data.get("chapters", []),
"chapters_total": data.get("chapters_total", 0),
"book_url": data.get("book_url"), # used later by parse/save tasks
}