You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/book_scraper.py

67 lines
2.0 KiB

# ============================================================
# File: scraper/book_scraper.py
# Purpose:
# Backwards-compatible wrapper giving the SAME public API
# as the old BookScraper, but internally uses ScrapeEngine.
#
# execute() → full metadata + chapterlist
#
# (* Chapter downloading komt later in ScrapeEngine,
# maar deze wrapper hoeft NIET aangepast te worden.)
# ============================================================
from scraper.logger_decorators import logcall
from scraper.services.scrape_engine import ScrapeEngine
class BookScraper:
"""
Backwards-compatible BookScraper façade.
In het oude systeem deed BookScraper ALLES:
- metadata ophalen
- cover ophalen
- hoofdstukkenlijst
- hoofdstukken downloaden
- volume folders
- skip logic
In het nieuwe systeem is dát opgesplitst:
ScrapeEngine → metadata / chapterlist / download engine (in ontwikkeling)
BookScraper → behoudt dezelfde API als voorheen
Daardoor kunnen Celery-tasks en oudere modules blijven werken
zonder refactor-chaos.
"""
@logcall
def __init__(self, site_scraper, url: str):
self.site = site_scraper
self.url = url
@logcall
def execute(self):
"""
Public legacy API.
Retourneert metadata + chapters EXACT zoals de oude BookScraper
vóór downloadfase.
Dit is belangrijk:
- INIT-flow gebruikt metadata only
- scraping tasks gebruiken chapterlist
"""
data = ScrapeEngine.fetch_metadata_and_chapters(self.site, self.url)
# Legacy output structuur volledig repliceren:
return {
"title": data.get("title"),
"author": data.get("author"),
"description": data.get("description"),
"cover_url": data.get("cover_url"),
"chapters": data.get("chapters", []),
"chapters_total": data.get("chapters_total", 0),
"book_url": data.get("book_url"),
}