# ============================================================ # File: scraper/tasks/progress_tasks.py # Purpose: Central progress updater for chapter pipelines. # Updated for chapter_dict pipeline model. # ============================================================ from celery_app import celery_app from scraper.progress import inc_completed, inc_skipped, inc_failed from logbus.publisher import log print(">>> [IMPORT] progress_tasks.py loaded") @celery_app.task(bind=False, name="progress.update", queue="controller") def update_progress(result: dict, book_id: str): """ Central progress logic: - result: output of save_chapter - book_id: explicitly passed by pipeline IMPORTANT: - save_chapter already updates counters for skipped & normal chapters - progress.update MUST NOT double-increment """ ch = result.get("chapter") or {} chapter_num = ch.get("num") skipped = result.get("skipped", False) failed = result.get("failed", False) # ------------------------------------------------------------ # FAILED CASE # ------------------------------------------------------------ if failed: inc_failed(book_id) log(f"[PROG] FAILED chapter {chapter_num}") return result # ------------------------------------------------------------ # SKIPPED CASE # ------------------------------------------------------------ if skipped: # save_chapter already did: # inc_skipped(book_id) log(f"[PROG] SKIPPED chapter {chapter_num}") return result # ------------------------------------------------------------ # NORMAL COMPLETION # ------------------------------------------------------------ # save_chapter did NOT increment completed for skipped cases # but DID inc_completed(book_id) for normal cases. # update_progress should NOT double increment, so only log here. log(f"[PROG] DONE chapter {chapter_num}") return result