You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
67 lines
2.0 KiB
67 lines
2.0 KiB
# ============================================================
|
|
# File: scraper/book_scraper.py
|
|
# Purpose:
|
|
# Backwards-compatible wrapper giving the SAME public API
|
|
# as the old BookScraper, but internally uses ScrapeEngine.
|
|
#
|
|
# execute() → full metadata + chapterlist
|
|
#
|
|
# (* Chapter downloading komt later in ScrapeEngine,
|
|
# maar deze wrapper hoeft NIET aangepast te worden.)
|
|
# ============================================================
|
|
|
|
from scraper.logger_decorators import logcall
|
|
from scraper.services.scrape_engine import ScrapeEngine
|
|
|
|
|
|
class BookScraper:
|
|
"""
|
|
Backwards-compatible BookScraper façade.
|
|
|
|
In het oude systeem deed BookScraper ALLES:
|
|
- metadata ophalen
|
|
- cover ophalen
|
|
- hoofdstukkenlijst
|
|
- hoofdstukken downloaden
|
|
- volume folders
|
|
- skip logic
|
|
|
|
In het nieuwe systeem is dát opgesplitst:
|
|
|
|
ScrapeEngine → metadata / chapterlist / download engine (in ontwikkeling)
|
|
BookScraper → behoudt dezelfde API als voorheen
|
|
|
|
Daardoor kunnen Celery-tasks en oudere modules blijven werken
|
|
zonder refactor-chaos.
|
|
"""
|
|
|
|
@logcall
|
|
def __init__(self, site_scraper, url: str):
|
|
self.site = site_scraper
|
|
self.url = url
|
|
|
|
@logcall
|
|
def execute(self):
|
|
"""
|
|
Public legacy API.
|
|
Retourneert metadata + chapters EXACT zoals de oude BookScraper
|
|
vóór downloadfase.
|
|
|
|
Dit is belangrijk:
|
|
- INIT-flow gebruikt metadata only
|
|
- scraping tasks gebruiken chapterlist
|
|
"""
|
|
|
|
data = ScrapeEngine.fetch_metadata_and_chapters(self.site, self.url)
|
|
|
|
# Legacy output structuur volledig repliceren:
|
|
return {
|
|
"title": data.get("title"),
|
|
"author": data.get("author"),
|
|
"description": data.get("description"),
|
|
"cover_url": data.get("cover_url"),
|
|
"chapters": data.get("chapters", []),
|
|
"chapters_total": data.get("chapters_total", 0),
|
|
"book_url": data.get("book_url"),
|
|
}
|