You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
56 lines
1.8 KiB
56 lines
1.8 KiB
# ============================================================
|
|
# File: scraper/book_scraper.py
|
|
# Purpose:
|
|
# Backwards-compatible wrapper giving the SAME public API
|
|
# as the old BookScraper, but internally uses ScrapeEngine.
|
|
#
|
|
# execute() → full metadata + chapterlist (NO book_idx creation)
|
|
#
|
|
# ID management is now handled exclusively by InitService.
|
|
# ============================================================
|
|
|
|
from scraper.logger_decorators import logcall
|
|
from scraper.services.scrape_engine import ScrapeEngine
|
|
|
|
|
|
class BookScraper:
|
|
"""
|
|
Backwards-compatible BookScraper façade.
|
|
|
|
Old responsibilities (metadata, chapters, covers, downloads)
|
|
are now split:
|
|
|
|
ScrapeEngine → metadata + chapterlist
|
|
Download tasks → handle download/parse/save
|
|
InitService → determines book_idx (single source of truth)
|
|
|
|
This wrapper intentionally does NOT generate a book_idx or book_id.
|
|
It only returns metadata/chapters in legacy-compatible dict format.
|
|
"""
|
|
|
|
@logcall
|
|
def __init__(self, site_scraper, url: str):
|
|
self.site = site_scraper
|
|
self.url = url
|
|
|
|
@logcall
|
|
def execute(self):
|
|
"""
|
|
Legacy public API:
|
|
Return metadata + chapter list EXACTLY as before,
|
|
but without generating any book_id.
|
|
"""
|
|
|
|
data = ScrapeEngine.fetch_metadata_and_chapters(self.site, self.url)
|
|
|
|
# Legacy structure preserved, unchanged:
|
|
return {
|
|
"title": data.get("title"),
|
|
"author": data.get("author"),
|
|
"description": data.get("description"),
|
|
"cover_url": data.get("cover_url"),
|
|
"chapters": data.get("chapters", []),
|
|
"chapters_total": data.get("chapters_total", 0),
|
|
"book_url": data.get("book_url"), # used later by parse/save tasks
|
|
}
|