You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
52 lines
1.4 KiB
52 lines
1.4 KiB
# ============================================================
|
|
# File: scraper/sites/base.py
|
|
# Purpose:
|
|
# Abstract interface that every site-specific scraper must implement.
|
|
# ============================================================
|
|
|
|
from abc import ABC, abstractmethod
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
class SiteScraper(ABC):
|
|
"""
|
|
Defines the interface for site-specific scrapers.
|
|
Each concrete scraper (Piaotian, Biquge, etc.) must implement these.
|
|
"""
|
|
|
|
@property
|
|
@abstractmethod
|
|
def root(self) -> str: ...
|
|
|
|
@property
|
|
@abstractmethod
|
|
def encoding(self) -> str: ...
|
|
|
|
@property
|
|
@abstractmethod
|
|
def chapter_list_selector(self) -> str: ...
|
|
|
|
# --------------------------
|
|
# Metadata extraction
|
|
# --------------------------
|
|
@abstractmethod
|
|
def parse_title(self, soup: BeautifulSoup) -> str: ...
|
|
|
|
@abstractmethod
|
|
def parse_author(self, soup: BeautifulSoup) -> str: ...
|
|
|
|
@abstractmethod
|
|
def parse_description(self, soup: BeautifulSoup) -> str: ...
|
|
|
|
@abstractmethod
|
|
def parse_cover(self, soup: BeautifulSoup, url: str) -> str | None: ...
|
|
|
|
# --------------------------
|
|
# Chapter extraction
|
|
# --------------------------
|
|
@abstractmethod
|
|
def extract_chapter_page_url(self, soup: BeautifulSoup) -> str: ...
|
|
|
|
@abstractmethod
|
|
def parse_chapter_list(self, soup: BeautifulSoup) -> list: ...
|