# scraper/tasks/scraping.py from celery_app import celery_app from logbus.publisher import log import os from scraper.sites import BookSite from scraper.book_scraper import BookScraper print(">>> [IMPORT] scraping.py loaded") @celery_app.task(bind=True, queue="scraping", ignore_result=False) def start_scrape_book(self, url: str): """Scrapes metadata + chapter list using new BookScraper.execute().""" log(f"[SCRAPING] Start scraping for: {url}") site = BookSite() scraper = BookScraper(site, url) # ---------------------------------------- # NEW API (old: scraper.parse_book_info()) # ---------------------------------------- result = scraper.execute() chapters = result.get("chapters", []) full_count = len(chapters) # ---------------------------------------- # DRY RUN logic # ---------------------------------------- DRY_RUN = os.getenv("DRY_RUN", "0") == "1" TEST_LIMIT = int(os.getenv("TEST_LIMIT", "5")) if DRY_RUN: log(f"[SCRAPING] DRY_RUN: limiting chapters to {TEST_LIMIT}") chapters = chapters[:TEST_LIMIT] result["chapters"] = chapters log(f"[SCRAPING] Completed scrape: {len(chapters)}/{full_count} chapters") # ---------------------------------------- # Dispatch download pipelines # ---------------------------------------- celery_app.send_task( "scraper.tasks.controller_tasks.launch_downloads", args=[result], queue="controller", ) return result