You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kmftools/bookscraper/scraper/sites/base.py

52 lines
1.4 KiB

# ============================================================
# File: scraper/sites/base.py
# Purpose:
# Abstract interface that every site-specific scraper must implement.
# ============================================================
from abc import ABC, abstractmethod
from bs4 import BeautifulSoup
class SiteScraper(ABC):
"""
Defines the interface for site-specific scrapers.
Each concrete scraper (Piaotian, Biquge, etc.) must implement these.
"""
@property
@abstractmethod
def root(self) -> str: ...
@property
@abstractmethod
def encoding(self) -> str: ...
@property
@abstractmethod
def chapter_list_selector(self) -> str: ...
# --------------------------
# Metadata extraction
# --------------------------
@abstractmethod
def parse_title(self, soup: BeautifulSoup) -> str: ...
@abstractmethod
def parse_author(self, soup: BeautifulSoup) -> str: ...
@abstractmethod
def parse_description(self, soup: BeautifulSoup) -> str: ...
@abstractmethod
def parse_cover(self, soup: BeautifulSoup, url: str) -> str | None: ...
# --------------------------
# Chapter extraction
# --------------------------
@abstractmethod
def extract_chapter_page_url(self, soup: BeautifulSoup) -> str: ...
@abstractmethod
def parse_chapter_list(self, soup: BeautifulSoup) -> list: ...