# ============================================================ # File: scraper/engine/fetcher.py # Purpose: # Low-level HTML fetch utility shared by all site scrapers. # Replaces scattered _fetch() logic inside BookScraper. # ============================================================ import requests from bs4 import BeautifulSoup HEADERS = { "User-Agent": ( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:118.0) " "Gecko/20100101 Firefox/118.0" ) } def fetch_html(url: str, encoding: str = "utf-8", timeout: int = 10) -> BeautifulSoup: """ Fetch HTML with a consistent user-agent and encoding. Returns BeautifulSoup(lxml). """ resp = requests.get(url, headers=HEADERS, timeout=timeout) resp.encoding = encoding return BeautifulSoup(resp.text, "lxml")