parent
6154b396e3
commit
e0695cf216
@ -1 +1,4 @@
|
||||
output/
|
||||
venv/
|
||||
*.log
|
||||
__pycache__/
|
||||
@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Local macOS Audio Worker — runs outside Docker so macOS 'say' works.
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from dotenv import load_dotenv
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
ENV_FILE = os.path.join(BASE_DIR, ".env")
|
||||
|
||||
# Load .env if present
|
||||
if os.path.exists(ENV_FILE):
|
||||
load_dotenv(ENV_FILE)
|
||||
print(f"[AUDIO-LOCAL] Loaded .env from {ENV_FILE}")
|
||||
else:
|
||||
print("[AUDIO-LOCAL] WARNING: no .env found")
|
||||
|
||||
|
||||
def main():
|
||||
print("=====================================================")
|
||||
print(" LOCAL macOS AUDIO WORKER")
|
||||
print(" Queue : audio")
|
||||
print(" Voice :", os.getenv("AUDIO_VOICE"))
|
||||
print(" Rate :", os.getenv("AUDIO_RATE"))
|
||||
print("=====================================================")
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# OVERRIDES: Local Redis instead of Docker internal hostname
|
||||
# ----------------------------------------------------------
|
||||
broker = os.getenv("REDIS_BROKER_LOCAL", "redis://127.0.0.1:6379/0")
|
||||
backend = os.getenv("REDIS_BACKEND_LOCAL", "redis://127.0.0.1:6379/1")
|
||||
|
||||
os.environ["CELERY_BROKER_URL"] = broker
|
||||
os.environ["CELERY_RESULT_BACKEND"] = backend
|
||||
|
||||
print(f"[AUDIO-LOCAL] Using Redis broker : {broker}")
|
||||
print(f"[AUDIO-LOCAL] Using Redis backend: {backend}")
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# Celery command
|
||||
# macOS requires prefork pool, and we use a single-line list.
|
||||
# ----------------------------------------------------------
|
||||
cmd = [
|
||||
"celery",
|
||||
"-A",
|
||||
"celery_app",
|
||||
"worker",
|
||||
"-Q",
|
||||
"audio",
|
||||
"-n",
|
||||
"audio_local@%h",
|
||||
"-l",
|
||||
"INFO",
|
||||
"--pool=prefork",
|
||||
]
|
||||
|
||||
print("[AUDIO-LOCAL] Launching Celery via subprocess…")
|
||||
|
||||
subprocess.run(cmd, check=False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -1,9 +1,15 @@
|
||||
FROM python:3.12-slim
|
||||
WORKDIR /app
|
||||
|
||||
# Install audio worker dependencies
|
||||
COPY requirements.audio.txt /app/requirements.audio.txt
|
||||
RUN pip install --no-cache-dir -r /app/requirements.audio.txt
|
||||
|
||||
# Celery is noodzakelijk voor de worker
|
||||
RUN pip install --no-cache-dir celery
|
||||
|
||||
# Copy project
|
||||
COPY . /app
|
||||
|
||||
CMD ["python3", "-c", "print('audio worker ready')"]
|
||||
# Start the AUDIO Celery worker
|
||||
CMD ["celery", "-A", "celery_app", "worker", "-Q", "audio", "-n", "audio@%h", "-l", "INFO"]
|
||||
|
||||
@ -1,10 +0,0 @@
|
||||
# tasks/audio.py
|
||||
from celery import shared_task
|
||||
from logbus.publisher import log
|
||||
|
||||
|
||||
@shared_task(bind=True, queue="audio")
|
||||
def text_to_audio(self, text_file):
|
||||
log(f"[AUDIO] converting: {text_file}")
|
||||
# placeholder for macOS "say"
|
||||
return True
|
||||
@ -0,0 +1,183 @@
|
||||
# ============================================================
|
||||
# File: scraper/tasks/audio_tasks.py
|
||||
# ============================================================
|
||||
|
||||
from celery_app import celery_app
|
||||
from logbus.publisher import log
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from scraper.abort import abort_requested
|
||||
from redis import Redis
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# Kies lokale redis als aanwezig, anders standaard backend
|
||||
redis_url = os.getenv("REDIS_BACKEND_LOCAL") or os.getenv("REDIS_BACKEND")
|
||||
|
||||
parsed = urlparse(redis_url)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# REGULIER REDIS CLIENT (slots, file checks, state)
|
||||
# ------------------------------------------------------------
|
||||
redis_client = Redis(
|
||||
host=parsed.hostname,
|
||||
port=parsed.port,
|
||||
db=parsed.path.strip("/"),
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# BACKEND CLIENT (abort flags, progress counters) - altijd DB 0
|
||||
# ------------------------------------------------------------
|
||||
backend_client = Redis(
|
||||
host=parsed.hostname,
|
||||
port=parsed.port,
|
||||
db=0,
|
||||
)
|
||||
|
||||
AUDIO_TIMEOUT = int(os.getenv("AUDIO_TIMEOUT_SECONDS", "300"))
|
||||
AUDIO_VOICE = os.getenv("AUDIO_VOICE", "SinJi")
|
||||
AUDIO_RATE = int(os.getenv("AUDIO_RATE", "200"))
|
||||
HOST_PATH = os.getenv("HOST_PATH", "/app/output")
|
||||
AUDIO_SLOTS = int(os.getenv("AUDIO_SLOTS", "1"))
|
||||
|
||||
CONTAINER_PREFIX = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "/app/output")
|
||||
|
||||
|
||||
@celery_app.task(bind=True, queue="audio", ignore_result=True)
|
||||
def generate_audio(
|
||||
self, book_id, volume_name, chapter_number, chapter_title, chapter_text
|
||||
):
|
||||
log(f"[AUDIO] CH{chapter_number}: START task → raw_input={chapter_text}")
|
||||
|
||||
# Abort early
|
||||
if abort_requested(book_id, backend_client):
|
||||
log(f"[AUDIO] ABORT detected → skip CH{chapter_number}")
|
||||
return
|
||||
|
||||
# ============================================================
|
||||
# ACQUIRE AUDIO SLOT
|
||||
# ============================================================
|
||||
slot_key = None
|
||||
ttl = AUDIO_TIMEOUT + 15
|
||||
|
||||
for i in range(1, AUDIO_SLOTS + 1):
|
||||
key = f"audio_slot:{i}"
|
||||
if redis_client.set(key, "1", nx=True, ex=ttl):
|
||||
slot_key = key
|
||||
log(f"[AUDIO] CH{chapter_number}: Acquired slot {i}/{AUDIO_SLOTS}")
|
||||
break
|
||||
|
||||
if slot_key is None:
|
||||
log(f"[AUDIO] CH{chapter_number}: All slots busy → waiting...")
|
||||
start_wait = time.time()
|
||||
|
||||
while slot_key is None:
|
||||
for i in range(1, AUDIO_SLOTS + 1):
|
||||
key = f"audio_slot:{i}"
|
||||
if redis_client.set(key, "1", nx=True, ex=ttl):
|
||||
slot_key = key
|
||||
log(f"[AUDIO] CH{chapter_number}: Slot acquired after wait")
|
||||
break
|
||||
|
||||
if slot_key:
|
||||
break
|
||||
|
||||
if abort_requested(book_id, backend_client):
|
||||
log(f"[AUDIO] ABORT while waiting → skip CH{chapter_number}")
|
||||
return
|
||||
|
||||
if time.time() - start_wait > ttl:
|
||||
log(f"[AUDIO] CH{chapter_number}: Slot wait timeout → aborting audio")
|
||||
return
|
||||
|
||||
time.sleep(0.25)
|
||||
|
||||
# ============================================================
|
||||
# PATH NORMALISATION
|
||||
# ============================================================
|
||||
|
||||
container_path = chapter_text
|
||||
log(f"[AUDIO] CH{chapter_number}: container_path={container_path}")
|
||||
|
||||
# 1) Strip container prefix to get relative path: BOOK/VOLUME/FILE
|
||||
if container_path.startswith(CONTAINER_PREFIX):
|
||||
relative_path = container_path[len(CONTAINER_PREFIX) :].lstrip("/")
|
||||
else:
|
||||
relative_path = container_path # fallback
|
||||
|
||||
parts = relative_path.split("/")
|
||||
if len(parts) < 3:
|
||||
log(
|
||||
f"[AUDIO] CH{chapter_number}: FATAL — cannot parse book/volume from {relative_path}"
|
||||
)
|
||||
if slot_key:
|
||||
redis_client.delete(slot_key)
|
||||
return
|
||||
|
||||
book_from_path = parts[0]
|
||||
volume_from_path = parts[1]
|
||||
|
||||
# 2) Construct real host path
|
||||
host_path = os.path.join(HOST_PATH, relative_path)
|
||||
log(f"[AUDIO] CH{chapter_number}: resolved_host_path={host_path}")
|
||||
|
||||
# ============================================================
|
||||
# PREPARE OUTPUT DIR (always correct)
|
||||
# ============================================================
|
||||
|
||||
base_dir = os.path.join(HOST_PATH, book_from_path, volume_from_path, "Audio")
|
||||
os.makedirs(base_dir, exist_ok=True)
|
||||
|
||||
safe_num = f"{chapter_number:04d}"
|
||||
audio_file = os.path.join(base_dir, f"{safe_num}.m4a")
|
||||
|
||||
log(f"[AUDIO] CH{chapter_number}: output_file={audio_file}")
|
||||
|
||||
if os.path.exists(audio_file):
|
||||
log(f"[AUDIO] Skip CH{chapter_number} → already exists")
|
||||
redis_client.delete(slot_key)
|
||||
return
|
||||
|
||||
# ============================================================
|
||||
# BUILD CMD
|
||||
# ============================================================
|
||||
|
||||
cmd = (
|
||||
f"say --voice={AUDIO_VOICE} "
|
||||
f"--input-file='{host_path}' "
|
||||
f"--output-file='{audio_file}' "
|
||||
f"--file-format=m4bf "
|
||||
f"--quality=127 "
|
||||
f"-r {AUDIO_RATE} "
|
||||
f"--data-format=aac"
|
||||
)
|
||||
|
||||
log(f"[AUDIO] CH{chapter_number}: CMD = {cmd}")
|
||||
|
||||
# ============================================================
|
||||
# RUN TTS
|
||||
# ============================================================
|
||||
|
||||
try:
|
||||
subprocess.run(cmd, shell=True, check=True, timeout=AUDIO_TIMEOUT)
|
||||
log(f"[AUDIO] CH{chapter_number}: Completed")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
log(f"[AUDIO] CH{chapter_number}: TIMEOUT → remove incomplete file")
|
||||
if os.path.exists(audio_file):
|
||||
try:
|
||||
os.remove(audio_file)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
log(f"[AUDIO] CH{chapter_number}: ERROR during say → {e}")
|
||||
|
||||
except Exception as e:
|
||||
log(f"[AUDIO] CH{chapter_number}: UNEXPECTED ERROR → {e}")
|
||||
|
||||
finally:
|
||||
if slot_key:
|
||||
redis_client.delete(slot_key)
|
||||
log(f"[AUDIO] CH{chapter_number}: Released slot")
|
||||
@ -0,0 +1,46 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo ""
|
||||
echo "====================================================="
|
||||
echo " STARTING LOCAL macOS AUDIO WORKER"
|
||||
echo "====================================================="
|
||||
echo ""
|
||||
|
||||
# ------------------------------------------------------
|
||||
# Create venv if needed
|
||||
# ------------------------------------------------------
|
||||
if [ ! -d ".venv" ]; then
|
||||
echo "[AUDIO] No .venv found — creating virtualenv..."
|
||||
python3 -m venv .venv
|
||||
else
|
||||
echo "[AUDIO] Existing .venv found"
|
||||
fi
|
||||
|
||||
# Activate virtualenv
|
||||
echo "[AUDIO] Activating .venv"
|
||||
source .venv/bin/activate
|
||||
|
||||
# ------------------------------------------------------
|
||||
# Install requirements
|
||||
# ------------------------------------------------------
|
||||
REQ="requirements.audio.txt"
|
||||
|
||||
if [ ! -f "$REQ" ]; then
|
||||
echo "[AUDIO] ERROR — $REQ not found!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[AUDIO] Installing audio requirements..."
|
||||
pip install -r "$REQ"
|
||||
|
||||
# Celery must be installed locally too
|
||||
echo "[AUDIO] Ensuring Celery installed..."
|
||||
pip install celery
|
||||
|
||||
# ------------------------------------------------------
|
||||
# Start the worker
|
||||
# ------------------------------------------------------
|
||||
echo ""
|
||||
echo "[AUDIO] Starting audio worker..."
|
||||
python3 audio_worker_local.py
|
||||
Loading…
Reference in new issue