You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/engine/fetcher.py

28 lines
818 B

# ============================================================
# File: scraper/engine/fetcher.py
# Purpose:
# Low-level HTML fetch utility shared by all site scrapers.
# Replaces scattered _fetch() logic inside BookScraper.
# ============================================================
import requests
from bs4 import BeautifulSoup
HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:118.0) "
"Gecko/20100101 Firefox/118.0"
)
}
def fetch_html(url: str, encoding: str = "utf-8", timeout: int = 10) -> BeautifulSoup:
"""
Fetch HTML with a consistent user-agent and encoding.
Returns BeautifulSoup(lxml).
"""
resp = requests.get(url, headers=HEADERS, timeout=timeout)
resp.encoding = encoding
return BeautifulSoup(resp.text, "lxml")